In [57]:
from collections import defaultdict
import glob
import numpy as np
import os
import pandas as pd
import shutil

# Create Maintenance Features for each BUNO 

## First make temporary files by BUNO with raw maintenance records

In [58]:
min_year = 2009
max_year = 2016

maint_data_dir = 'maint_data'
temp_maint_dir = 'buno-maint-files-temp'
bunos = np.load('miscellaneous/bunos.npy')

all_fnames = list(glob.iglob(os.path.join(maint_data_dir, '*.csv')))

if os.path.exists(temp_maint_dir):
    shutil.rmtree(temp_maint_dir)

os.mkdir(temp_maint_dir)
    
written = defaultdict(lambda: False)

print('Writing temporary maintenance record files...')

for year in range(min_year, max_year + 1):
    fnames = [fname for fname in all_fnames if str(year) in fname]
    dfs = [pd.read_csv(open(fname, 'r', errors='ignore'), dtype=str) for fname in fnames]

    if not len(dfs):
        continue
    
    year_df = pd.concat(dfs)

    for i, buno in enumerate(bunos):
        print(f'Processing year {year}, BUNO {i + 1}/{bunos.size}...', end='\r')
        
        buno_fp = os.path.join(temp_maint_dir, f'{buno}-maint-history.csv')
        buno_file = open(buno_fp, 'a')
        buno_data = year_df[year_df['Bu/SerNo'] == str(buno)]
        
        if len(buno_data) > 0:
            write_header = True if not written[buno] else False
            buno_data.to_csv(buno_file, index=False, header=write_header)
            written[buno] = True

        if not written[buno]:
            os.remove(buno_fp)

print('\nDone!')

Writing temporary maintenance record files...
Processing year 2016, BUNO 663/663...
Done!


## Create maintenance feature files

In [198]:
print('Creating maintenance feature files...')

at_codes = ['t', 'c', 'J', 'L', '6', 'F', 'B', ‘ ‘, 'D', 'T']

maint_feat_dir = 'buno-maint-feat'
cum_fh_dir = 'buno-cum-fh-files'

if not os.path.exists(maint_feat_dir):
    os.mkdir(maint_feat_dir)
    
fnames = list(glob.iglob(os.path.join(temp_maint_dir, '*.csv')))

for i, fname in enumerate(fnames):
    print(f'Processing BUNO {i + 1}/{len(fnames)}', end='\r')
    
    buno = fname.split('/')[-1].split('.')[0].split('-')[0]
    
    maint_df = pd.read_csv(fname)
    maint_df['Comp Date'] = pd.to_datetime(maint_df['Comp Date'])
    maint_df = maint_df.rename(columns={'Bu/SerNo': 'buno'})
    maint_df = maint_df.sort_values('Comp Date').reset_index()
    
    feat_df = pd.DataFrame()
    feat_df['Date'] = maint_df['Comp Date']
    
    # Get the Action Taken onehot features
    at_df = maint_df['Action Taken']
    maint_df['Action Taken'][~maint_df['Action Taken'].isin(action_codes)] = 'NULL'
    
    if 'NULL' in at_feat.columns:
        at_feat = pd.get_dummies(at_df).drop('NULL', axis=1)
    
    feat_df = pd.concat([feat_df, at_feat], axis=1)
    
    # Not all files have every code in them. So add columns for the missing ones
    missing_codes = [code for code in at_codes if code not in feat_df.columns]
    for code in missing_codes:
        feat_df[code] = 0
        
    # Merge maintenance events that occur on the same date
    agg_funs = dict([(col, 'max') for col in at_codes])
    feat_df = feat_df.groupby('Date').agg(agg_funs)
        
    cum_fh_fp = os.path.join(cum_fh_dir, f'{buno}-cum-fh.csv')

    # Merge in cumulative flight hours
    feat_df = feat_df.merge(cum_fh_df, on='Date').drop('Buno', axis=1)
    
    feat_df.to_csv(os.path.join(maint_feat_dir, f'{buno}-maint-feat.csv'), index=False)
    
print('\nDone!\n')

shutil.rmtree(temp_maint_dir)

Creating maintenance feature files...
Processing BUNO 1/596

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Processing BUNO 56/596

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Processing BUNO 100/596

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Processing BUNO 139/596

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Processing BUNO 142/596

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Processing BUNO 147/596

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Processing BUNO 179/596

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Processing BUNO 203/596

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Processing BUNO 225/596

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Processing BUNO 230/596

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Processing BUNO 330/596

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Processing BUNO 376/596

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Processing BUNO 463/596

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Processing BUNO 475/596

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Processing BUNO 528/596

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Processing BUNO 580/596

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Processing BUNO 583/596

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Processing BUNO 596/596
Done!



# Create files that indicate whether a plane was damaged after a mission

These are the training labels

In [52]:
print('Creating breakage label files...')

mission_data_dir = 'sharp_data'
mission_feat_dir = 'buno_files/buno_mission'
label_dir = 'buno-breakages'

if not os.path.exists(training_set_dir):
    os.mkdir(training_set_dir)
    
if not os.path.exists(label_dir):
    os.mkdir(label_dir)
    
fnames = list(glob.iglob(os.path.join(mission_feat_dir, '*.csv')))

for i, fname in enumerate(fnames):
    print(f'Processing BUNO {i + 1}/{len(fnames)}', end='\r')
    
    buno = fname.split('/')[-1].split('.')[0].split('-')[0]
    df = pd.read_csv(fname)
    
    write_fname = os.path.join(label_dir, f'{buno}-breakages.csv')
    
    damaged_df = df[['Buno', 'LaunchDate', 'dam']]
    damaged_df.to_csv(write_fname, index=False)

print('\nDone.')

Creating breakage label files...
Processing BUNO 540/540
Done.
