## References:
* https://flask-sqlalchemy.readthedocs.io/en/stable/
* https://docs.sqlalchemy.org/en/20/orm/inheritance.html#concrete-table-inheritance
* https://docs.sqlalchemy.org/en/20/_modules/examples/performance/bulk_inserts.html
* https://docs.sqlalchemy.org/en/20/orm/large_collections.html#bulk-insert-of-new-items
* https://www.restack.io/p/adding-columns-sqlalchemy-models-answer-async-bulk-insert

## Refactoring notes

- The relationship between a frequency severity model and the input premium file is missing
- The back-relationship were missing for handling properly session.delete(histolossfile) :

-

    class Analysis(CommonMixin, Base):
        """Represents an analysis entity."""
    
        id: Mapped[int] = mapped_column(primary_key=True)
        client_id: Mapped[int] = mapped_column(ForeignKey("client.id"), nullable=False)
        client: Mapped["Client"] = relationship(back_populates="analyses")
    
        histolossfiles: Mapped[List["HistoLossFile"]] = relationship(
            secondary=lambda: analysis_histolossfile_table, back_populates="analyses"
        )
        modelfiles: Mapped[List["ModelFile"]] = relationship(
            secondary=lambda: analysis_modelfile_table, back_populates="analyses"
        )

        class HistoLossFile(CommonMixin, Base):
 
        analyses: Mapped[List[Analysis]] = relationship(
            secondary=lambda: analysis_histolossfile_table, back_populates="histolossfiles"
        )

  - 

      class ModelFile(CommonMixin, Base):
            """Base class for model files."""
        
            id: Mapped[int] = mapped_column(primary_key=True)
            model_type: Mapped[str] = mapped_column(String(50), nullable=False)
            years_simulated: Mapped[int] = mapped_column(nullable=False)
        
            client_id: Mapped[int] = mapped_column(ForeignKey("client.id"), nullable=False)
            client: Mapped["Client"] = relationship(back_populates="modelfiles")
        
            yearlosses: Mapped[List["ModelYearLoss"]] = relationship(
                back_populates="modelfile",
                cascade="all, delete-orphan",
            )
        
            analyses: Mapped[List[Analysis]] = relationship(
                secondary=lambda: analysis_modelfile_table, back_populates="modelfiles"
            )
    

- Cascade delete, all for client-analysis:


    class Client(CommonMixin, Base):
        """Represents a client entity."""
    
        id: Mapped[int] = mapped_column(primary_key=True)
        name: Mapped[str] = mapped_column(String(50), nullable=False)
    
        analyses: Mapped[List["Analysis"]] = relationship(
            back_populates="client", cascade="all, delete-orphan"
        )

- The Pydantic classes FrequencyInput and SeverityInput need to be reviewed and refactored
- The attribute treshold is missing in the FrequencyModel class
- Start frequency and severity models parameters with index 0
- The threshold of frequency and severity models is that of the related frequency_severity_model => Remove attribute threshold from severity model
- Use ModelType in polymorphic identity

## Engine

### Enumerations and Dataclasses

In [1]:
from dataclasses import dataclass
from enum import Enum


class ModelType(Enum):
    """Defines the supported loss models."""

    EMPIRICAL = "empirical"
    # FREQUENCY_SEVERITY = "frequency_severity"
    FREQUENCY_SEVERITY = "frequencyseveritymodel"
    COMPOSITE_FREQUENCY_SEVERITY = "composite_frequency_severity"
    EXPOSURE_BASED = "exposure_based"


class DistributionType(Enum):
    """Defines the supported statistical distributions."""

    POISSON = "poisson"
    NEGATIVE_BINOMIAL = "negative_binomial"
    PARETO = "pareto"


class LossType(Enum):
    """Defines the loss types."""

    CAT = "cat"
    NON_CAT = "non_cat"


@dataclass
class DistributionInput:
    """
    Configuration for a statistical distribution.

    Attributes:
        dist: The distribution type (enum).
        params: Parameters specific to the distribution.
    """

    dist: DistributionType
    params: list[float]

### Functions

In [2]:
import numpy as np
from scipy.stats import poisson, nbinom, pareto


# Main function to generate model year loss
def get_modelyearloss_frequency_severity(
    threshold: float,
    frequency_input: DistributionInput,
    severity_input: DistributionInput,
    simulated_years: int,
) -> dict:
    """
    Generate loss data for a frequency-severity model over a number of simulated years.

    Args:
        threshold (float): Multiplier applied to the severity losses.
        frequency_input (DistributionInput): Distribution defining the frequency of events per year.
        severity_input (DistributionInput): Distribution defining the severity of each event.
        simulated_years (int): Number of years to simulate.

    Returns:
        dict: A dictionary with the following keys:
            - "year": List of years for each loss event.
            - "day": Random day of the year for each loss.
            - "loss": Calculated loss values.
            - "loss_type": Type of loss (e.g., catastrophic or non-catastrophic).
    """
    frequencies = generate_frequencies(frequency_input, simulated_years)
    years = generate_years(frequencies)
    loss_count = len(years)
    days = generate_days(loss_count)
    losses = threshold * get_sample_from_dist(
        severity_input.dist, severity_input.params, loss_count
    )
    loss_types = generate_loss_types(loss_count)

    modelyearloss = {
        "year": years,
        "day": days,
        "loss": losses,
        "loss_type": loss_types,
    }
    return modelyearloss


def generate_frequencies(
    frequency_input: DistributionInput,
    size: int,
) -> list[int]:
    """
    Generate a list of event frequencies based on a specified distribution.

    Args:
        frequency_input (DistributionInput): The distribution and parameters for frequency generation.
        size (int): Number of values to generate.

    Returns:
        list[int]: A list of event frequencies for each simulated year.
    """
    frequencies = get_sample_from_dist(
        frequency_input.dist, frequency_input.params, simulated_years
    )
    return frequencies


def generate_years(frequencies: list[int]) -> list[int]:
    """
    Generate a list of years for loss events based on event frequencies.

    Args:
        frequencies (list[int]): A list where each element represents the number of events in a year.

    Returns:
        list[int]: A list of years, repeated according to their respective frequencies.
    """
    years = [
        year for year, freq in enumerate(frequencies, start=1) for _ in range(freq)
    ]
    return years


def generate_days(size: int) -> np.ndarray:
    """
    Generate random days of the year for loss events.

    Args:
        size (int): Number of days to generate.

    Returns:
        np.ndarray: An array of random integers representing days (1 to 365).
    """
    days = np.random.randint(1, 366, size)
    return days


def generate_loss_types(size: int) -> np.ndarray:
    """
    Generate random loss types (catastrophic or non-catastrophic) for events.

    Args:
        size (int): Number of loss types to generate.

    Returns:
        np.ndarray: An array of randomly chosen loss types (catastrophic or non-catastrophic).
    """
    loss_types = np.random.choice([LossType.CAT.value, LossType.NON_CAT.value], size)
    return loss_types


def get_sample_from_dist(
    dist: DistributionType, params: list[float], size: int
) -> np.ndarray:
    """
    Generate a sample from the specified distribution.

    Args:
        dist (DistributionType): The distribution type (e.g., Poisson, Negative Binomial, Pareto).
        params (list[float]): Parameters for the selected distribution.
        size (int): The number of samples to generate.

    Returns:
        np.ndarray: An array of samples drawn from the specified distribution.

    Raises:
        ValueError: If the distribution is not supported.
    """
    match dist:
        case DistributionType.POISSON:
            return poisson.rvs(mu=params[0], size=size)
        case DistributionType.NEGATIVE_BINOMIAL:
            return nbinom.rvs(n=params[0], p=params[1], size=size)
        case DistributionType.PARETO:
            return pareto.rvs(b=params[0], size=size)
        case _:
            raise ValueError(f"Unsupported distribution: {dist}")


# Example usage
dist = DistributionType.PARETO  # Use Enum for distributions
params = [2]
sample = 1000 * get_sample_from_dist(dist, params, size=1_000_000)

# Output for debugging
print(f"Chosen distribution: {dist.value}")
print(f"Parameters: {params}")
print(f"Sample: {sample}")
print(f"Sample's mean: {sample.mean()}")

Chosen distribution: pareto
Parameters: [2]
Sample: [1125.5012521  1012.1873085  2370.44509137 ... 1345.28046044 2672.56236271
 2106.53195184]
Sample's mean: 2004.4712251830867


### Tests

In [3]:
import time

threshold = 1000

frequency_input = DistributionInput(
    dist=DistributionType.POISSON,
    params=[3],
)

severity_input = DistributionInput(
    dist=DistributionType.PARETO,
    params=[2],
)

simulated_years = 1_000_000

start = time.perf_counter()
dict_modelyearloss = get_modelyearloss_frequency_severity(
    threshold,
    frequency_input,
    severity_input,
    simulated_years,
)
print(f"Duration = {time.perf_counter() - start}")
print(f"Average Loss = {dict_modelyearloss['loss'].mean()}")
print(f"Frequency = {len(dict_modelyearloss['loss']) / simulated_years}")

Duration = 0.718647300000157
Average Loss = 1997.7024869338081
Frequency = 2.998155


## Backend

### Utils

In [4]:
def get_records_from_dict(data: dict) -> list[dict]:
    """
    Converts a dictionary with lists of values into a list of records.

    Args:
        data (dict): A dictionary where keys represent column names and
                     values are lists representing rows.

    Returns:
        list[dict]: A list of dictionaries, each representing a record.
    """
    records = [dict(zip(data.keys(), values)) for values in zip(*data.values())]
    return records

### Imports

In [5]:
import time

from sqlalchemy import insert, select, text
from sqlalchemy.exc import SQLAlchemyError

from database import (
    Analysis,
    Client,
    HistoLossFile,
    ModelFile,
    ModelYearLoss,
    FrequencyModel,
    SeverityModel,
    FrequencySeverityModel,
    session,
)

### Scripts

In [6]:
# Create a client

# Input data
client_name = "AXA"

try:
    client = Client(name=client_name)
    session.add(client)
    session.commit()
    print(f"Client '{client_name}' added successfully.")
except SQLAlchemyError as e:
    session.rollback()
    print(f"Database error occurred: {e}")
    raise
except Exception as e:
    session.rollback()
    print(f"An unexpected error occured: {e}")
    raise

finally:
    session.close()

Client 'AXA' added successfully.


In [7]:
# Create an analysis and associate it with a client

# Input data
client_id = 1

try:
    client = session.get_one(Client, client_id)
    analysis = Analysis()
    client.analyses.append(analysis)
    session.commit()
    print(f"Analysis added successfully")
except SQLAlchemyError as e:
    session.rollback()
    print(f"Database error occurred: {e}")
    raise
except Exception as e:
    session.rollback()
    print(f"An unexpected error occured: {e}")
    raise

finally:
    session.close()

Analysis added successfully


In [8]:
# Create a historical loss file and associate it with a client and an analysis

# Input data
analysis_id = 1

try:
    # Retrieve the analysis and the associated client
    analysis = session.get_one(Analysis, analysis_id)
    client = analysis.client

    # Create the historical loss file
    histolossfile = HistoLossFile()

    # Associate the historical loss file with the client and analysis
    client.histolossfiles.append(histolossfile)
    analysis.histolossfiles.append(histolossfile)

    session.commit()
    print(f"Historical loss file added successfully")

except SQLAlchemyError as e:
    session.rollback()
    print(f"Database error occurred: {e}")
    raise

except Exception as e:
    session.rollback()
    print(f"An unexpected error occured: {e}")
    raise

finally:
    session.close()

Historical loss file added successfully


### Functions

In [9]:
from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy.orm import Session


def create_frequency_severity_model(
    session: Session,
    analysis_id: int,
    lossfile_id: int,
    threshold: float,
    frequency_input: DistributionInput,
    severity_input: DistributionInput,
    years_simulated: int,
) -> None:
    """
    Creates a frequency-severity model and persists related data in the database.

    Args:
        session (Session): The SQLAlchemy session to use for database operations.
        analysis_id (int): ID of the analysis to associate the model with.
        lossfile_id (int): ID of the loss file to associate the model with.
        threshold (float): Threshold parameter for the model.
        frequency_input (FrequencyInput): Input parameters for the frequency model.
        severity_input (SeverityInput): Input parameters for the severity model.
        years_simulated (int): Number of years simulated for the model.

    Raises:
        SQLAlchemyError: If a database error occurs during the process.
    """
    try:
        # Fetch analysis and client information
        analysis = session.get(Analysis, analysis_id)
        if not analysis:
            raise ValueError(f"Analysis with ID {analysis_id} not found.")
        client_id = analysis.client_id

        # Create frequency and severity models
        frequencymodel = FrequencyModel(
            **{
                f"parameter_{i}": param
                for i, param in enumerate(frequency_input.params)
            }
        )
        severitymodel = SeverityModel(
            **{f"parameter_{i}": param for i, param in enumerate(severity_input.params)}
        )

        # Prepare the frequency-severity model file
        modelfile = FrequencySeverityModel(
            model_type="frequencyseveritymodel",
            threshold=threshold,
            years_simulated=years_simulated,
            lossfile_id=lossfile_id,
            frequencymodel=frequencymodel,
            severitymodel=severitymodel,
            client_id=client_id,
        )

        # Link the model file to the analysis
        analysis.modelfiles.append(modelfile)
        session.flush()  # Flush to generate modelfile ID

        # Generate and insert the year loss data
        start = time.perf_counter()
        modelyearloss = get_modelyearloss_frequency_severity(
            threshold, frequency_input, severity_input, years_simulated
        )
        duration = time.perf_counter() - start
        print(f"Time to get modelyearloss: {duration}")
        start = time.perf_counter()
        modelyearloss_records = [
            {**loss, "modelfile_id": modelfile.id}
            for loss in get_records_from_dict(modelyearloss)
        ]
        duration = time.perf_counter() - start
        print(f"Time to get modelyearloss records: {duration}")
        start = time.perf_counter()
        session.execute(insert(ModelYearLoss), modelyearloss_records)
        duration = time.perf_counter() - start
        print(f"Time to insert modelyearloss records: {duration}")

        # Commit
        start = time.perf_counter()
        session.commit()
        duration = time.perf_counter() - start
        print(f"Time to commit: {duration}")

    except SQLAlchemyError as e:
        session.rollback()
        print(f"Database error occurred: {e}")
        raise
    except Exception as e:
        session.rollback()
        print(f"An unexpected error occurred: {e}")
        raise
    finally:
        session.close()

In [10]:
# Usage example
start = time.perf_counter()
create_frequency_severity_model(
    session,
    analysis_id=1,
    lossfile_id=1,
    threshold=1000,
    frequency_input=DistributionInput(
        dist=DistributionType.POISSON,
        params=[3, 0, 0, 0, 0],
    ),
    severity_input=DistributionInput(
        dist=DistributionType.PARETO,
        params=[2, 0, 0, 0, 0],
    ),
    years_simulated=100_000,
)

duration = time.perf_counter() - start
print(f"Total Duration = {duration}")

Time to get modelyearloss: 0.7039654000000155
Time to get modelyearloss records: 5.310707600000114
Time to insert modelyearloss records: 29.902278999999908
Time to commit: 0.22154219999993074
Total Duration = 36.6916025999999


In [33]:
# Delete a historical loss file

# Input data
histlossfile_id = 3

try:
    histolossfile = session.get(HistoLossFile, histlossfile_id)
    if not histolossfile:
            raise ValueError(f"Historical loss file with ID {histlossfile_id} not found.")
    session.delete(histolossfile)
    session.commit()
    print(f"The historical loss file has been deleted.")
    
except SQLAlchemyError as e:
    session.rollback()
    print(f"Database error occurred: {e}")
    raise

except Exception as e:
    session.rollback()
    print(f"An unexpected error occured: {e}")
    raise

finally:
    session.close()

The historical loss file has been deleted.


In [25]:
fsm = session.scalars(
    select(FrequencySeverityModel)
).first()

In [26]:
fsm

FrequencySeverityModel(id=1)

In [34]:
try:
    session.delete(fsm)
    session.commit()
except Exception as e:
    session.rollback()
    print(f"An error occured: {e}")
    raise
finally:
    session.close()    

# DELETE A FREQUENCY SEVERITY MODEL, A FREQUENCY SOLO, A SEVERITY SOLO

# TRANSFORM SCRIPTS INTO FUNCTIONS

# THEN CORRECT DAYS RECORDING IN THE DATABASE

# THEN CREATE JIRA SPECIFIC ISSUES

# USE ASYNCIO??? (ASK CHATGPT)

# THEN WORKSHOP WITH ANTOINE B TO REVIEW CHANGES LIKE THOSE IN PYDANTIC FOR FREQUENCYINPUT SEVERITYINPUT ETC

# THEN DO UI FOR TRAIN_SQLA FOR  ENGINE ONLY FOR ACTUARIES