# Create a report from a file with gaps per sub-characteristic


In [1]:
from IPython.display import IFrame, display
from ml_quality.assessments import QualityAssessment
from ml_quality.assessment_automation import write_gaps
from ml_quality.constants import GAP_FILE_COLUMNS
from pathlib import Path
import pandas as pd

### Initialise a quality assessment

In [2]:
assessment = QualityAssessment(
                name="Example system 1",
                team="Example team 1",
                business_criticality="production_critical",
                mlp_name="Example model 1",
                date="2024-04-01",
                model_family=None,
            )
assessment.set_gaps_from_csv("./assessments/inputs/gaps_model_1.csv")
report_path = assessment.create_pdf_report(font_type="verdana")
print(report_path)

ml_quality_reports/example_team_1/example_system_1/2024-04-01/report.pdf


### Visualize report

In [3]:
IFrame(report_path, width=700, height=400)

### Change assessment parameter e.g. date

In [4]:
assessment.set_gap("accuracy", "large")
assessment.date = "2024-04-03"
assessment.create_pdf_report(font_type="verdana")

'ml_quality_reports/example_team_1/example_system_1/2024-04-03/report.pdf'

### Generate another quality assessment report from a different input

In [5]:
assessment = QualityAssessment(
                name="Example system 2",
                team="Example team 2",
                business_criticality="production_critical",
                mlp_name="Example model 2",
                date="2023-12-01",
                model_family=None,
            )
assessment.set_gaps_from_csv("./assessments/inputs/gaps_model_2.csv")
report_path = assessment.create_pdf_report(font_type="verdana")

In [6]:
IFrame(report_path, width=700, height=400)

# Generate technical gaps per model from ML registry data

In [7]:
# Load ml registry data
df_registry = pd.read_csv("./ml_registry_example.csv")
df_registry.head()

Unnamed: 0,model_name,archived,alias,uri,version,short_description,long_description,team_name,team_id,department_name,...,report_url,quality_score,business_criticality,maturity,expected_maturity,model_family,raq_questionnaire_filled_in,number_applications_using_the_model,model_is_of_strategic_importance,yyyy_mm_dd
0,model1,False,mt_safe_search,ml-asset:static-model/model1/0.0.1,0.0.1,My model 1 description,The model is awesome and improves KPI by 100%,teamA,60007795,departmentA,...,"""""",30,poc,0,1,,,0,1,2024-03-22
1,model2,False,flights_top_airlines,ml-asset:static-model/model2/0.0.1,0.0.1,My model 2 description,The model is awesome and improves KPI by 100%,teamB,60006424,departmentB,...,"""""",30,poc,0,1,,,0,0,2024-03-22


In [9]:
output_dir = Path("gaps_from_registry")
output_dir.mkdir(parents=True, exist_ok=True)

for m in df_registry.model_name:
    df_model = df_registry[df_registry.model_name==m]
    df_gaps = write_gaps(df_model.iterrows(), df_registry.columns)
    pd.DataFrame(df_gaps, columns=GAP_FILE_COLUMNS).to_csv(output_dir / f"gaps_{m}.csv")

Index(['sub_characteristic', 'gap_value', 'url', 'reasoning', 'model_name',
       'team_name', 'business_criticality', 'model_family'],
      dtype='object')
Index(['sub_characteristic', 'gap_value', 'url', 'reasoning', 'model_name',
       'team_name', 'business_criticality', 'model_family'],
      dtype='object')
