## Some Injury Rate Calculations That We Probably Won't Use

And now delinquent vs. nondelinquent injury rates by year and mine:

In [1]:
def get_del_injury_rate(inj_data, hrs_data, rate_type):
    inj_mine_year = pd.pivot_table(inj_data, index=['mine_id','cal_yr'], values='document_no', 
                                    columns=['delinquent'], aggfunc='count').my_flatten_cols()
                                        
    hrs_mine_year = hrs_data.groupby(['mine_id','calendar_yr'], as_index=False)['annual_hrs'].sum()
    hrs_mine_year = hrs_mine_year.rename(index=str, columns={'calendar_yr': 'cal_yr'})
    
    hrs_inj_mine_year = pd.merge(hrs_mine_year, inj_mine_year, how='left', 
                                      on=['mine_id','cal_yr'], suffixes=('_m','_inj'))
    hrs_inj_mine_year = hrs_inj_mine_year.rename(index=str, columns={0: 'non_inj',1:'del_inj'})
    
    hrs_inj_mine_year['earliest_date'] = hrs_inj_mine_year['mine_id'].map(earliest_series)
    hrs_inj_mine_year['earliest_year'] = pd.DatetimeIndex(hrs_inj_mine_year['earliest_date']).year + 1    
    hrs_inj_mine_year['rate_type'] = hrs_inj_mine_year.apply(find_rate_type, axis=1)
    hrs_inj_mine_year['inj_rate'] = hrs_inj_mine_year.apply(calc_del_injury_rate, axis=1)
    
    return hrs_inj_mine_year

Let's test that function:

In [None]:
print(get_del_injury_rate(inj_data, hrs_data, 'All mines').sample(10))

The below function is similar to the above, but it takes an average of all years per mine. 

In [None]:
def get_agg_injury_rate(inj_data, hrs_data, rate_type):
    inj_mine_year = inj_data.groupby(['mine_id','cal_yr'], as_index=False)['document_no'].count()
    inj_mine_year = inj_mine_year.rename(index=str, columns={'document_no': 'inj_cnt'})
    
    hrs_mine_year = hrs_data.groupby(['mine_id','calendar_yr'], as_index=False)['annual_hrs'].sum()
    hrs_mine_year = hrs_mine_year.rename(index=str, columns={'calendar_yr': 'cal_yr'})
    
    hrs_inj_mine_year = pd.merge(hrs_mine_year, inj_mine_year, how='left', 
                                      on=['mine_id','cal_yr'], suffixes=('_m','_inj'))

    hrs_inj_mine_year['inj_rate'] = hrs_inj_mine_year.apply(calc_injury_rate, axis=1)
    
    mine_aggs = {'annual_hrs':'sum',
                 'inj_cnt':'sum',
                 'inj_rate':'mean'}
    
    by_mine = hrs_inj_mine_year.groupby('mine_id').agg(mine_aggs).reset_index()
        
    
    #by_del_type = by_del_type.rename(index=str, columns={'annual_hrs Delinquent':'hrs_del',
    #                                                     'annual_hrs Became delinquent': 'hrs_b',
    #                                                    'annual_hrs Non-delinquent':'hrs_non',
    #                                                     'del_inj Delinquent':'inj_del',
    #                                                    'non_inj Non-delinquent':'inj_non',
    #                                                     'inj_rate Delinquent':'inj_rate_del',
    #                                                    'inj_rate Became delinquent': 'inj_rate_became',
    #                                                    'inj_rate Non-delinquent':'inj_rate_non'})
    
    return by_mine

In [None]:
print(get_agg_injury_rate(inj_data, hrs_data, 'All mines')
      .sort_values('inj_cnt', ascending=False))

In [None]:
def get_agg_del_injury_rate(inj_data, hrs_data, rate_type):
    inj_mine_year = pd.pivot_table(inj_data, index=['mine_id','cal_yr'], values='document_no', 
                                    columns=['delinquent'], aggfunc='count').my_flatten_cols()
                                        
    hrs_mine_year = hrs_data.groupby(['mine_id','calendar_yr'], as_index=False)['annual_hrs'].sum()
    hrs_mine_year = hrs_mine_year.rename(index=str, columns={'calendar_yr': 'cal_yr'})
    
    hrs_inj_mine_year = pd.merge(hrs_mine_year, inj_mine_year, how='left', 
                                      on=['mine_id','cal_yr'], suffixes=('_m','_inj'))
    hrs_inj_mine_year = hrs_inj_mine_year.rename(index=str, columns={0: 'non_inj',1:'del_inj'})
        
    hrs_inj_mine_year['earliest_date'] = hrs_inj_mine_year['mine_id'].map(earliest_series)
    hrs_inj_mine_year['earliest_year'] = pd.DatetimeIndex(hrs_inj_mine_year['earliest_date']).year + 1    
    hrs_inj_mine_year['rate_type'] = hrs_inj_mine_year.apply(find_rate_type, axis=1)
    hrs_inj_mine_year['inj_rate'] = hrs_inj_mine_year.apply(calc_del_injury_rate, axis=1)
        
    by_del_type = hrs_inj_mine_year.pivot_table(hrs_inj_mine_year, index='mine_id', columns='rate_type',
                                                            aggfunc={'annual_hrs':'sum',
                                                                     'non_inj':'sum',
                                                                     'del_inj':'sum',
                                                                     'inj_rate':'mean'}).reset_index()
    
    
    ##the rest of this is just cleaning up our columns
    by_del_type.columns = [' '.join(col).strip() for col in by_del_type.columns.values]
    by_del_type['inj_became'] = by_del_type['del_inj Became delinquent'] + by_del_type['non_inj Became delinquent']
    
    mine_inj_rates = get_agg_injury_rate(inj_data, hrs_data, 'All mines')
    mine_inj_rates = mine_inj_rates[['mine_id','inj_rate']]
    
    by_del_type = pd.merge(by_del_type,mine_inj_rates, how='left', on='mine_id', suffixes=('_1','_2'))
        
    by_del_type = by_del_type[['mine_id','inj_rate','inj_rate Delinquent','inj_rate Became delinquent', 'inj_rate Non-delinquent',
                               'del_inj Delinquent','inj_became','non_inj Non-delinquent',
                               'annual_hrs Delinquent','annual_hrs Became delinquent','annual_hrs Non-delinquent']]
    by_del_type = by_del_type.rename(index=str, columns={'annual_hrs Delinquent':'hrs_del',
                                                         'annual_hrs Became delinquent': 'hrs_b',
                                                        'annual_hrs Non-delinquent':'hrs_non',
                                                         'del_inj Delinquent':'inj_del',
                                                        'non_inj Non-delinquent':'inj_non',
                                                         'inj_rate Delinquent':'inj_rate_del',
                                                        'inj_rate Became delinquent': 'inj_rate_became',
                                                        'inj_rate Non-delinquent':'inj_rate_non'})
    
    return by_del_type

print(get_agg_del_injury_rate(inj_data, hrs_data, 'All mines').sample(20))

### All Accidents

In [None]:
#all_mineyear_inj_rates = get_del_injury_rate(inj_data, hrs_data, 'All mines')
#all_mineyear_inj_rates_del = combine_del_mine_data(all_mineyear_inj_rates)
#all_mineyear_inj_rates_del.to_csv(data_dir + 'analysis/msha_INJURIES_MINE_YEAR_delinquency.csv')

all_mine_inj_rates = get_agg_del_injury_rate(inj_data, hrs_data, 'All mines')
all_mine_inj_rates_del = combine_del_mine_data(all_mine_inj_rates)
all_mine_inj_rates_del.to_csv(data_dir + 'analysis/msha_INJURIES_MINE_delinquency.csv')

### Justice Accidents
Using the `get_agg_del_injury_rate()` function, we're going to filter out just accidents that happened at mines where one of the Justice controllers were controller at the time of the accident.

We will then join that data to current mine information to see who currently owns the mine so we don't make any inappropriate inferences. We will also join on current status and current status date. 

So the correct way to talk about these data will be: In years when the Justice family was acting as controller over the mine, it experienced a delinquent injury rate of xxx and a nondelinquent injury rate of xxx. They may not currently be controller at this mine.

### Aggregate injury rates by mine