In [1]:
import pandas as pd
import numpy as np
import os

In [2]:
folder = r'C:\Users\Erik\Downloads'
util_fname = 'Utilization Tabular (12).csv'
org_fname = 'Organizations.csv'
emp_fname = 'Employees (14).csv'
first_valid_date = '2020-06-01'
last_valid_date = '2020-10-31'

In [3]:
# read in data
util_df = pd.read_csv(os.path.join(folder, util_fname), sep='\t',
                       encoding='utf_16_le')
util_df['Hours Date'] = pd.to_datetime(util_df['Hours Date'])
filt = (util_df['Hours Date'] < pd.to_datetime(last_valid_date)) & (util_df['Hours Date'] > pd.to_datetime(first_valid_date))
util_df = util_df[filt]
org_df = pd.read_csv(os.path.join(folder, org_fname), sep='\t',
                       encoding='utf_16_le')
emp_df = pd.read_csv(os.path.join(folder, emp_fname), sep='\t',
                       encoding='utf_16_le')
emp_df['Hire date'] = pd.to_datetime(emp_df['Hire date'])
emp_df['Termination date'] = pd.to_datetime(emp_df['Termination date'])

In [4]:
def join_util_to_org(util_df, org_df):
    df = pd.merge(util_df, org_df, how='left', left_on='Project ID', right_on='Project ID')
    df['Project Name'] = df['Project Name_x']
    df = df.drop(columns=['Project Name_x', 'Project Name_y'])
    
    return df

In [5]:
df = join_util_to_org(util_df, org_df)

In [6]:
months_dict = {
    'Jun': ('2020-06-01', '2020-06-30'),
    'Jul': ('2020-07-01', '2020-07-31'),
    'Aug': ('2020-08-01', '2020-08-31'),
    'Sep': ('2020-09-01', '2020-09-30'),
    'Oct': ('2020-10-01', '2020-10-31')
}

In [7]:
def calc_period_utilization(df, emp_df, month, start, end):
    """start and end as 'YYYY-MM-DD' strings, returns pd.Series"""
    start = pd.to_datetime(start)
    end = pd.to_datetime(end)
    
    # get fte hours
    fte_df = emp_df[['Employee ID', 'Hire date', 'Termination date']].copy()

    def update_start(hire_date):
        if hire_date < start:
            return start
        elif hire_date > end:
            return end
        else:
            return hire_date

    def update_end(termination_date):
        if pd.isnull(termination_date):
            return end
        if termination_date > end:
            return end
        elif termination_date < start:
            return start
        else:
            return termination_date

    fte_df['sem_start'] = fte_df['Hire date'].apply(update_start)
    fte_df['sem_end'] = fte_df['Termination date'].apply(update_end)
    fte_df['bushrs'] = np.busday_count(fte_df['sem_start'].dt.date, fte_df['sem_end'].dt.date) * 8

    fte_hrs = fte_df['bushrs'].sum()
    
    # get total hours
    filt = (df['Hours Date'] >= start) & (df['Hours Date'] <= end)
    df = df.loc[filt]

    total_hrs = df['Entered Hours'].sum()
    
    # get org total hours
    org_total_hrs = df.groupby('Organization Name').sum()['Entered Hours']
    
    # get org bill hours
    filt = df['User Defined Code 3'] == 'SRV'
    org_bill_hrs = df.loc[filt].groupby('Organization Name').sum()['Entered Hours']
    
    hrs_df = pd.merge(org_bill_hrs, org_total_hrs, left_index=True, right_index=True, suffixes=('_bill', '_total'))
    # divide org total hrs by total hours to get proportion
    hrs_df['prop_to_org'] = hrs_df['Entered Hours_total'] / total_hrs
    # weight fte by prop to org
    hrs_df['weighted_fte'] = hrs_df['prop_to_org'] * fte_hrs
    # utilization is billale hours divided by weighted fte
    hrs_df['utilization'] = hrs_df['Entered Hours_bill'] / hrs_df['weighted_fte']
    
    return pd.Series(hrs_df['utilization'], name=month)    

In [8]:
# calc for all orgs
series = []
for month, (start, end) in months_dict.items():
    result = calc_period_utilization(df, emp_df, month, start, end)
    series.append(result)
month_df = pd.DataFrame(series).transpose()
month_df

Unnamed: 0,Jun,Jul,Aug,Sep,Oct
Africa,0.535603,0.790061,0.771504,0.777482,0.719925
Comms & KM,0.662981,0.691463,0.717295,0.576408,0.693655
Global Adaptive Managemen,0.74548,0.862383,0.931112,0.915414,0.85472
Habitat,0.503051,0.684379,0.742646,0.728334,0.626902
Latin America & the Carib,0.873843,0.852291,0.862028,0.925518,0.850394
Water,0.621322,0.720952,0.482385,0.486318,0.44551
General Intl,,0.061451,0.108848,0.146275,0.089056


In [9]:
# add domestic as combination of habitat and water
filt = (df['Organization Name'] == 'Habitat') | (df['Organization Name'] == 'Water')
df.loc[filt, 'Organization Name'] = 'Domestic'

In [10]:
series = []
for month, (start, end) in months_dict.items():
    result = calc_period_utilization(df, emp_df, month, start, end)
    series.append(result)
dom_df = pd.DataFrame(series).transpose()
dom_df

Unnamed: 0,Jun,Jul,Aug,Sep,Oct
Africa,0.535603,0.790061,0.771504,0.777482,0.719925
Comms & KM,0.662981,0.691463,0.717295,0.576408,0.693655
Domestic,0.559989,0.701903,0.61019,0.613268,0.549913
Global Adaptive Managemen,0.74548,0.862383,0.931112,0.915414,0.85472
Latin America & the Carib,0.873843,0.852291,0.862028,0.925518,0.850394
General Intl,,0.061451,0.108848,0.146275,0.089056


In [11]:
pd.DataFrame(dom_df.loc['Domestic', :]).transpose()

Unnamed: 0,Jun,Jul,Aug,Sep,Oct
Domestic,0.559989,0.701903,0.61019,0.613268,0.549913


In [12]:
final_df = pd.concat([month_df, pd.DataFrame(dom_df.loc['Domestic', :]).transpose()])
final_df

Unnamed: 0,Jun,Jul,Aug,Sep,Oct
Africa,0.535603,0.790061,0.771504,0.777482,0.719925
Comms & KM,0.662981,0.691463,0.717295,0.576408,0.693655
Global Adaptive Managemen,0.74548,0.862383,0.931112,0.915414,0.85472
Habitat,0.503051,0.684379,0.742646,0.728334,0.626902
Latin America & the Carib,0.873843,0.852291,0.862028,0.925518,0.850394
Water,0.621322,0.720952,0.482385,0.486318,0.44551
General Intl,,0.061451,0.108848,0.146275,0.089056
Domestic,0.559989,0.701903,0.61019,0.613268,0.549913
