# Triage Experiment Report


In [None]:
import pandas as pd
import sqlalchemy
import os
import logging
import matplotlib.pyplot as plt

from yaml import safe_load
from sqlalchemy.engine.url import URL
from triage.util.db import create_engine
from triage.component.postmodeling.experiment_summarizer import ExperimentReport, get_most_recent_experiment_hash, load_report_parameters_from_config


pd.set_option('display.precision', 4)
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

plt.rcParams.update({"figure.dpi": 120})

# suppress logging messages that make report long
logging.getLogger().setLevel(logging.CRITICAL)

%matplotlib inline

## Database Connection

In [None]:
with open("database.yaml", "r") as f:
    config = safe_load(f)

db_url = URL(
            'postgres',
            host=config['host'],
            username=config['user'],
            password=config['pass'],
            database=config['db'],
            port=config['port']
        )

In [None]:
# in case you have your db credentials as environment variables uncomment this code
# db_url = URL(
#             'postgres',
#             host=os.getenv('PGHOST'),
#             username=os.getenv('PGUSER'),
#             database=os.getenv('PGDATABASE'),
#             password=os.getenv('PGPASSWORD'),
#             port=5432,
#         )

In [None]:

db_engine = create_engine(db_url)

## 1. Initializing the Report
Initializing the Report generation class. Before we do that, we need to establish some parameters

#### 1.1 Default Parameters
The following values are the default parameters for the report. If you are using this interactively, you can change the parameter values.

In [None]:
# The most recent completed experiment hash
# Note that this has to be a list
# You can replace the get_most_recent_... function call with your list of experiments to analyse different experiments
experiment_hashes = [get_most_recent_experiment_hash(db_engine)]


# Model Performance metric and threshold defaulting to reacll@1_pct
# You can update these to use different metrics, e.g., precision@, 100_abs
performance_metric = 'recall@'
threshold = '1_pct'

# Bias metric defaults to tpr_disparity and bias metric values for all groups generated (if bias audit specified in the experiment config)
bias_metric = 'tpr_disparity'
bias_priority_groups=None
### bias_priority_groups example 
# bias_priority_groups = {'race': ['African American'], 
#                         'gender': ['Female']}

#### 1.2 Updating the parameters based on the experiment configuration (YAML)

If you set the following parameters in the experiment config, the following codeblock will update the parameters 

```yaml
    scoring:
        # Append these key-value pairs to the scoring section
        priority_metric: 'recall@'
        priority_parameter: '1_pct' 
      
    bias_audit:
        ## Append these key-value pairs to the bias_audit section (if a bias audit is performed)
        priority_metric: 'tpr_disparity'

        priority_groups:
          'race':
            - 'African American'
          'gender':
            - 'Female'
  ```

In [None]:
params = load_report_parameters_from_config(db_engine, experiment_hashes[0])

if params['performance_metric'] is not None:
    performance_metric = params['performance_metric']

if params['threshold'] is not None:
    threshold = params['threshold']

if params['bias_metric'] is not None:
    bias_metric = params['bias_metric']

if params['priority_groups'] is not None:
    bias_priority_groups = params['priority_groups']

In [None]:
performance_metric, threshold, bias_metric, bias_priority_groups

In [None]:
rep = ExperimentReport(
    engine=db_engine,
    experiment_hashes=experiment_hashes,
    performance_priority_metric=performance_metric,
    threshold=threshold,
    bias_priority_metric=bias_metric,
    bias_priority_groups=bias_priority_groups
)

## 2. Experiment Summary

In [None]:
rep.generate_summary()

## 3. Visualizing the Temporal Validation Splits

In [None]:
rep.timesplits()

## 4. Modeling Cohorts

In [None]:
cohort_summary = rep.cohorts(generate_plots=True)

In [None]:
cohort_summary[['cohort_size', 'baserate']].describe()

#### Cohort Subsets

In [None]:
rep.subsets()

## 4. Predictors 

In [None]:
features = rep.features()
features

#### 4.1 Missingness of Features 

In [None]:
rep.feature_missingness()

## 5. Model Groups Built

In [None]:
rep.model_groups()

## 6. All Models Built

In [None]:
rep.models()

## 7. Model Performance

#### 7.1 Overall Cohort

In [None]:
evaluations = rep.model_performance()

In [None]:
evaluations

#### 7.2 Cohort subsets

In [None]:
# plot_subset_performance(db_engine, experiment_hashes, parameter,metric)
subset_evaluations = rep.model_performance_subsets()

## 8. Model Performance vs Bias

In [None]:
equity_metrics = rep.efficiency_and_equity()

## 9. Initial Model Selection and Further analysis on best models
For the purposes of this report, by default, we pick the best performing model from each model type based on average performance to generate additional outputs about the developed models. We would not assume the existence of predictions at this stage. Therefore, we will not do analysis such as list comparisons, crosstabs, score distribution type stuff. we'll look at more higher level comparisons between the different model types

In [None]:
rep.get_best_hp_config_for_each_model_type()