In [12]:
# workhorse modules
import pandas as pd
import numpy as np
from datetime import timedelta, datetime
import re
from pathlib import Path
import seaborn as sns
import datadotworld as dw
import matplotlib.pyplot as plt

# local utility functions
from utils import (
    add_cm_benchmarks,
    add_fiscal_year,
    set_pd_params,
    tidy_up_df,
    cast_dtypes,
    glue_date_time,
    compute_days_to_completion,
    compute_days_open,
    consolidate_prob_types,
    compute_pm_cm,
    compute_pm_cm_by_month,
    compute_kpi_table,
    compute_kpi_table_by_month,
)

from vis_utils import set_plot_params, pointplot_with_barplot

In [13]:
set_pd_params()
set_plot_params()
pd.options.mode.chained_assignment = None  # default='warn'

In [14]:
kpis_path = Path.cwd() / "data" / "dash_benchmarks_20210426.csv"
kpis_raw = pd.read_csv(
    kpis_path, parse_dates=["date_completed", "date_requested", "date_closed"]
)
kpis_raw.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 107523 entries, 0 to 107522
Data columns (total 19 columns):
 #   Column                  Non-Null Count   Dtype         
---  ------                  --------------   -----         
 0   wr_id                   107523 non-null  int64         
 1   status                  107523 non-null  object        
 2   description             98908 non-null   object        
 3   supervisor              84843 non-null   object        
 4   date_completed          97550 non-null   datetime64[ns]
 5   date_requested          107523 non-null  object        
 6   date_closed             97312 non-null   object        
 7   fy_request              107523 non-null  int64         
 8   role_name               100087 non-null  object        
 9   building_name           107172 non-null  object        
 10  b_number                107172 non-null  object        
 11  primary_type            107523 non-null  object        
 12  problem_type            107523

In [15]:
wr_tidy = tidy_up_df(kpis_raw)

print(f"The tidied work orders dataframe has {len(wr_tidy):,} rows.")
print(f"By tidying the data, we have removed {len(kpis_raw) - len(wr_tidy):,} rows.")

The tidied work orders dataframe has 107,396 rows.
By tidying the data, we have removed 127 rows.


In [28]:
wr_tidy['year'] = wr_tidy['calendar_month_request'].str[:4].astype(int)
wr_tidy['month'] = wr_tidy['calendar_month_request'].str[-2:].astype(int)
wr_tidy["month_start"] = pd.to_datetime(wr_tidy[["year", "month"]].assign(DAY=1))

In [29]:
wr_tidy.groupby(['primary_type', 'PM_type'])[['wr_id']].count()

Unnamed: 0_level_0,Unnamed: 1_level_0,wr_id
primary_type,PM_type,Unnamed: 2_level_1
PREVENTIVE_GENERAL,BASEMENT INSPECT,1214
PREVENTIVE_GENERAL,BLDG INSPECTION,1970
PREVENTIVE_GENERAL,ELEVATOR TEST,1222
PREVENTIVE_GENERAL,EXTERMINATION,12
PREVENTIVE_GENERAL,FLOOR BUFFING,99
PREVENTIVE_GENERAL,FUEL TANK TEST,597
PREVENTIVE_GENERAL,GENERATOR TEST,1181
PREVENTIVE_GENERAL,KITCHEN PM,20
PREVENTIVE_HVAC,HEAT CHECK TEST,308
PREVENTIVE_HVAC,HEATING LEVELS,754


In [30]:
cond_recent= wr_tidy['month_start'] >= '2018-01-01'
wr_recent = wr_tidy[cond_recent]

In [36]:
wr_recent.groupby(['primary_type', 'PM_type'])[['wr_id']].count()

Unnamed: 0_level_0,Unnamed: 1_level_0,wr_id
primary_type,PM_type,Unnamed: 2_level_1
PREVENTIVE_GENERAL,BASEMENT INSPECT,1161
PREVENTIVE_GENERAL,BLDG INSPECTION,1178
PREVENTIVE_GENERAL,ELEVATOR TEST,702
PREVENTIVE_GENERAL,EXTERMINATION,9
PREVENTIVE_GENERAL,FLOOR BUFFING,96
PREVENTIVE_GENERAL,FUEL TANK TEST,370
PREVENTIVE_GENERAL,GENERATOR TEST,718
PREVENTIVE_GENERAL,KITCHEN PM,12
PREVENTIVE_HVAC,HEAT CHECK TEST,308
PREVENTIVE_HVAC,HEATING LEVELS,744
