**Reducing Commercial Aviation Fatalities**

*Qingtian Zhou*

I'll first take a look at data, and then try to manipulate some features so that I can fit a Keras model to the data.

In [None]:
# Set preliminaries
import numpy as np
import pandas as pd
from keras.utils import to_categorical

In [None]:
# Import source
my_train = pd.read_csv('../input/train.csv')

In [None]:
my_train.head(10)

In [None]:
# Generate unique person ids
my_train['crew_seat'] = my_train.crew * 100 + my_train.seat
print('Unique persons: ' + str(len(my_train['crew_seat'].unique())))
print('Unique person ids: ' + str(my_train['crew_seat'].sort_values().unique()))

Given a crew_seat, I want to manipulate the features using the following steps:
0. Encode categorical variables
1. Align time step t = 1, 2, 3... to each time stamp.
2. Get partner's features, i.e. same features at same t subsetted to seat = |seat-1|.
3. D.feature for each feature in the self and partner feature sets, where D.variable = variable at t - variable at t-1.

In [None]:
my_train.dtypes

In [None]:
# Step 0
my_train['experiment'] = pd.Series(my_train['experiment'], dtype='category').cat.codes.astype('float64')
my_train['event'] = pd.Series(my_train['event'], dtype='category').cat.codes.astype('float64')

In [None]:
pd.crosstab(my_train['experiment'], my_train['event'])

Not sure how to use the information in "experiment". I'll just ignore it.

In [None]:
# Need a general model fitted to each specific person: subset data to each unique crew_seat
all_crew_seat = my_train.crew_seat.sort_values().unique()
my_train_expanded = {}
# Loop
for x in all_crew_seat:
    y = int(x/100)
    my_train_self = my_train[my_train.crew_seat==x]
    my_train_partner = my_train[(my_train.crew_seat!=x)&(my_train.crew==y)]
    # Step 1
    my_train_self['t'] = my_train_self.sort_values(by=['time']).reset_index(drop=True).index
    my_train_partner['t'] = my_train_partner.sort_values(by=['time']).reset_index(drop=True).index
    # Step 2
    my_train_temp_0 = my_train_self.merge(my_train_partner, how='left', on=['crew', 't'], suffixes=['_self', '_partner'])
    my_train_temp_0 = my_train_temp_0.sort_values(by=['t'])
    # Step 3
    my_train_temp_1 = my_train_temp_0.diff()
    my_train_temp_1 = my_train_temp_1.add_prefix('D.')
    # Bind columns
    my_train_expanded[x] = pd.concat([my_train_temp_0.reset_index(drop=True), my_train_temp_1.reset_index(drop=True)], axis=1).dropna()
# Check
print(my_train_expanded.keys())

In [None]:
# Define features and targets to be fed into a Keras model
feature_cols = ['time_self', 'eeg_fp1_self', 'eeg_f7_self', 'eeg_f8_self', 'eeg_t4_self', 'eeg_t6_self', 'eeg_t5_self', 'eeg_t3_self', 'eeg_fp2_self', 'eeg_o1_self', 'eeg_p3_self', 'eeg_pz_self', 'eeg_f3_self', 'eeg_fz_self', 'eeg_f4_self', 'eeg_c4_self', 'eeg_p4_self', 'eeg_poz_self', 'eeg_c3_self', 'eeg_cz_self', 'eeg_o2_self', 'ecg_self', 'r_self', 'gsr_self', 'eeg_fp1_partner', 'eeg_f7_partner', 'eeg_f8_partner', 'eeg_t4_partner', 'eeg_t6_partner', 'eeg_t5_partner', 'eeg_t3_partner', 'eeg_fp2_partner', 'eeg_o1_partner', 'eeg_p3_partner', 'eeg_pz_partner', 'eeg_f3_partner', 'eeg_fz_partner', 'eeg_f4_partner', 'eeg_c4_partner', 'eeg_p4_partner', 'eeg_poz_partner', 'eeg_c3_partner', 'eeg_cz_partner', 'eeg_o2_partner', 'ecg_partner', 'r_partner', 'gsr_partner', 'D.eeg_fp1_self', 'D.eeg_f7_self', 'D.eeg_f8_self', 'D.eeg_t4_self', 'D.eeg_t6_self', 'D.eeg_t5_self', 'D.eeg_t3_self', 'D.eeg_fp2_self', 'D.eeg_o1_self', 'D.eeg_p3_self', 'D.eeg_pz_self', 'D.eeg_f3_self', 'D.eeg_fz_self', 'D.eeg_f4_self', 'D.eeg_c4_self', 'D.eeg_p4_self', 'D.eeg_poz_self', 'D.eeg_c3_self', 'D.eeg_cz_self', 'D.eeg_o2_self', 'D.ecg_self', 'D.r_self', 'D.gsr_self', 'D.eeg_fp1_partner', 'D.eeg_f7_partner', 'D.eeg_f8_partner', 'D.eeg_t4_partner', 'D.eeg_t6_partner', 'D.eeg_t5_partner', 'D.eeg_t3_partner', 'D.eeg_fp2_partner', 'D.eeg_o1_partner', 'D.eeg_p3_partner', 'D.eeg_pz_partner', 'D.eeg_f3_partner', 'D.eeg_fz_partner', 'D.eeg_f4_partner', 'D.eeg_c4_partner', 'D.eeg_p4_partner', 'D.eeg_poz_partner', 'D.eeg_c3_partner', 'D.eeg_cz_partner', 'D.eeg_o2_partner', 'D.ecg_partner', 'D.r_partner', 'D.gsr_partner']
target_col = ['event_self']
X_train = {}
y_train = {}
for x in all_crew_seat:
    X_train[x] = my_train_expanded[x].loc[:, feature_cols].values
    y_train[x] = to_categorical(my_train_expanded[x].loc[:, target_col].values)
# Check
print(X_train.keys())
print(y_train.keys())
for x in all_crew_seat:
    print("crew_seat = " + str(x) + ": " + str(X_train[x].shape) + str(y_train[x].shape))