# FMD Scorecard KPIs

## Setup

### Import packages

In [1]:
# workhorse modules
import pandas as pd
import numpy as np
from datetime import timedelta, datetime
import re
from pathlib import Path
import datadotworld as dw

# local utility functions
from utils import *

### Set pandas options
This makes Pandas print all rows and columns to the output when requested.

In [2]:
set_pd_params()

pd.options.mode.chained_assignment = None  # default='warn'

### Import the data
Data is a copy of Archibus's `wrhwr` table, with some irrelevant columns left out. 

In [None]:
wr_raw = dw.query(
    dataset_key="dgsbpio/auditfinding3", query="select * from wrhwr_10072020"
).dataframe

print(f"The work orders dataframe has {wr_raw.shape[0]:,} rows.")

## Data cleaning

### Basic cleaning
Removes white spaces in strings to facilitate matching, and renames a few columns.

In [None]:
# apply the tidy up function
wr_tidy = tidy_up_wr(wr_raw)

print(f"The tidied work orders dataframe has {wr_tidy.shape[0]:,} rows.")

### Remove duplicate work orders
Removes rows where technician says WR is a duplicate in the description, and the status is "Canceled", "Closed", or "Rejected". 

In [None]:
wr_deduped = drop_dupes(wr_tidy)

print(f"The deduped work orders dataframe has {wr_deduped.shape[0]:,} rows.")
print(
    f"Removing duplicates has cut {wr_tidy.shape[0] - wr_deduped.shape[0]:,} rows from the work orders dataframe."
)

### Combine date and time columns to get timestamps
This takes the date from a date column and the time from a time column and combines them into a single timestamp.

This transformation allows us to know the time to completion with greater precision. 

In [None]:
# glue the date and time for request
wr_dt = glue_date_time(wr_deduped, "date_requested", "time_requested", "requested_dt")

# glue the date and time for completion
wr_dt = glue_date_time(wr_dt, "date_completed", "time_completed", "completed_dt")

# convert "date closed" to date time (this column has no time information)
wr_dt["date_closed"] = wr_dt["date_closed"].astype("datetime64")

wr_dt[["wr_id", "requested_dt", "completed_dt", "date_closed"]].sample(3)

## Data preparation

### Include the fiscal year

In [None]:
wr_fy = entirely_within_fiscal_year(wr_dt)

print(wr_fy.shape)
wr_fy[
    [
        "wr_id",
        "problem_type",
        "requested_dt",
        "date_closed",
        "fiscal_year",
    ]
].sample(6)

### Filter to PM only

In [None]:
PM_list = [
    "HVAC|PM",
    "BUILDING|PM",
    "PREVENTIVE MAINT",
    "FUEL INSPECTION",
    "BUILDING INTERIOR INSPECTION",
    "INSPECTION",
    "FUEL INSPECTION",
]

cond_fy = wr_fy["fiscal_year"].isin(range(2016, 2021))
cond_pm = wr_fy["problem_type"].isin(PM_list)

wr_filtered = wr_fy[cond_fy & cond_pm]

print(f"The work orders dataframe has {wr_filtered.shape[0]:,} rows.")

### Include days to completion

In [None]:
wr_durations = compute_days_to_completion(wr_filtered)

In [None]:
wr_durations[
    ["wr_id", "problem_type", "requested_dt", "completed_dt", "days_to_completion"]
].sample(6, random_state=444)

## Compute the median-based benchmark

In [None]:
cond_test = wr_durations["fiscal_year"].isin(range(2015, 2019))
wr_durations[cond_test]["fiscal_year"].unique()

In [None]:
# on_time_benchmark = 9.51
on_time_benchmark = wr_durations[cond_test]['days_to_completion'].median()
print(on_time_benchmark)

def compute_is_on_time(df, benchmark):
    df = df.copy()
    df['is_on_time'] = df['days_to_completion'] <= benchmark
    return df
    
wr_on_time = compute_is_on_time(wr_durations, on_time_benchmark)

In [None]:
wr_on_time["is_on_time"].mean()

In [None]:
wr_on_time.groupby("fiscal_year").mean()

In [None]:
import seaborn as sns

cond_year = wr_durations['fiscal_year'] == 2020
plot_data = wr_durations[cond_year]
sns.histplot(data=plot_data, x="days_to_completion")