In [None]:
import pandas as pd
import numpy as np
sample_submission = pd.read_csv("../input/reducing-commercial-aviation-fatalities/sample_submission.csv")
test = pd.read_csv("../input/reducing-commercial-aviation-fatalities/test.csv")
train = pd.read_csv("../input/reducing-commercial-aviation-fatalities/train.csv")

In [None]:
test.head()

In [None]:
train.head()

* **A = baseline**
* **B = SS**
* **C = CA**
* **D = DA**

**The pilots experienced distractions intended to induce one of the following three cognitive states:**

* **Channelized Attention (CA)** is, roughly speaking, the state of being focused on one task to the exclusion of all others. This is induced in benchmarking by having the subjects play an engaging puzzle-based video game.
* **Diverted Attention (DA)** is the state of having one’s attention diverted by actions or thought processes associated with a decision. This is induced by having the subjects perform a display monitoring task. Periodically, a math problem showed up which had to be solved before returning to the monitoring task.
* **Startle/Surprise (SS)** is induced by having the subjects watch movie clips with jump scares.

# EDA

In [None]:
train_time = train['time']
test_time = test['time']

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
plt.figure(figsize=(15,10))
sns.distplot(train_time,label="train time")
sns.distplot(test_time,label="test time")
plt.legend()
plt.xlabel("Time (s)")
plt.show()

In [None]:
plt.figure(figsize=(15,10))
sns.distplot(train['ecg'],label="train ecg",hist=False)
sns.distplot(test['ecg'],label="test ecg",hist=False)
plt.legend()
plt.xlabel("ECG")
plt.figure()

In [None]:
train_A = train[train['event'] == 'A']
train_B = train[train['event'] == 'B']
train_C = train[train['event'] == 'C']
train_D = train[train['event'] == 'D']

In [None]:
plt.figure(figsize=(15,10))
sns.distplot(train_A['ecg'],label="train_A ecg",hist=False)
sns.distplot(train_B['ecg'],label="train_B ecg",hist=False)
sns.distplot(train_C['ecg'],label="train_C ecg",hist=False)
sns.distplot(train_D['ecg'],label="train_D ecg",hist=False)
plt.legend()
plt.xlabel("ECG")
plt.figure()

In [None]:
plt.figure(figsize=(15,10))
sns.distplot(train_A['gsr'],label="train_A gsr")
sns.distplot(train_B['gsr'],label="train_B gsr")
sns.distplot(train_C['gsr'],label="train_C gsr")
sns.distplot(train_D['gsr'],label="train_D gsr")
plt.legend()
plt.xlabel("GSR")
plt.figure()

In [None]:
plt.figure(figsize=(15,10))
sns.distplot(train_A['r'],label="train_A r")
sns.distplot(train_B['r'],label="train_B r")
sns.distplot(train_C['r'],label="train_C r")
sns.distplot(train_D['r'],label="train_D r")
plt.legend()
plt.xlabel("Respiration")
plt.figure()

In [None]:
plt.figure(figsize=(15,10))
sns.distplot(train['ecg'],label="train ECG")
sns.distplot(test['ecg'],label="test ECG")
plt.legend()
plt.xlabel("ECG")
plt.figure()

In [None]:
eeg_features = ["eeg_fp1", "eeg_f7", "eeg_f8", "eeg_t4", "eeg_t6", "eeg_t5", "eeg_t3", "eeg_fp2", "eeg_o1", "eeg_p3", "eeg_pz", "eeg_f3", "eeg_fz", "eeg_f4", "eeg_c4", "eeg_p4", "eeg_poz", "eeg_c3", "eeg_cz", "eeg_o2"]
k=0

plt.figure(figsize=(20,25))
for i in eeg_features:
    k+=1
    plt.subplot(5,5,k)
    sns.distplot(train.sample(10000)[i],label="train "+i,hist=False)
    sns.distplot(test.sample(10000)[i],label="test "+i,hist=False)
    plt.xlim((-500, 500))
    plt.legend()
    
plt.show()
    
    

# Data prepration

## Preparing EEG data
This data is prepared in a fairly typical arrangement of 20 electrodes across the scalp. The letter in each lead signifies the part of the brain that that lead
is nearest to (Temporal, Frontal, Parietal etc), with odd numbers on the left, evens on the right. Usually in the clinic, we don't look at the electrical
potentials at each electrode, but at the potential difference between pairs of electrodes. This gives us an idea of the electrical field in the brain region
between these two points as a way to infer what the brain is doing in that region. 

In [None]:
train['fp1_f7'] = train['eeg_fp1'] - train['eeg_f7']
train['f7_t3'] = train['eeg_f7'] - train['eeg_t3']
train['t3_t5'] = train['eeg_t3'] - train['eeg_t5']
train['t5_o1'] = train['eeg_t5'] - train['eeg_o1']
train['fp1_f3'] = train['eeg_fp1'] - train['eeg_f3']
train['f3_c3'] = train['eeg_f3'] - train['eeg_c3']
train['c3_p3'] = train['eeg_c3'] - train['eeg_p3']
train['p3_o1'] = train['eeg_p3'] - train['eeg_o1']
train['fz_cz'] = train['eeg_fz'] - train['eeg_cz']
train['cz_pz'] = train['eeg_cz'] - train['eeg_pz']
train['pz_poz'] = train['eeg_pz'] - train['eeg_poz']
train['fp2_f8'] = train['eeg_fp2'] - train['eeg_f8']
train['f8_t4'] = train['eeg_f8'] - train['eeg_t4']
train['t4_t6'] = train['eeg_t4'] - train['eeg_t6']
train['t6_o2'] = train['eeg_t6'] - train['eeg_o2']
train['fp2_f4'] = train['eeg_fp2'] - train['eeg_f4']
train['f4_c4'] = train['eeg_f4'] - train['eeg_c4']
train['c4_p4'] = train['eeg_c4'] - train['eeg_p4']
train['p4_o2'] = train['eeg_p4'] - train['eeg_o2']

In [None]:
test['fp1_f7'] = test['eeg_fp1'] - test['eeg_f7']
test['f7_t3'] = test['eeg_f7'] - test['eeg_t3']
test['t3_t5'] = test['eeg_t3'] - test['eeg_t5']
test['t5_o1'] = test['eeg_t5'] - test['eeg_o1']
test['fp1_f3'] = test['eeg_fp1'] - test['eeg_f3']
test['f3_c3'] = test['eeg_f3'] - test['eeg_c3']
test['c3_p3'] = test['eeg_c3'] - test['eeg_p3']
test['p3_o1'] = test['eeg_p3'] - test['eeg_o1']
test['fz_cz'] = test['eeg_fz'] - test['eeg_cz']
test['cz_pz'] = test['eeg_cz'] - test['eeg_pz']
test['pz_poz'] = test['eeg_pz'] - test['eeg_poz']
test['fp2_f8'] = test['eeg_fp2'] - test['eeg_f8']
test['f8_t4'] = test['eeg_f8'] - test['eeg_t4']
test['t4_t6'] = test['eeg_t4'] - test['eeg_t6']
test['t6_o2'] = test['eeg_t6'] - test['eeg_o2']
test['fp2_f4'] = test['eeg_fp2'] - test['eeg_f4']
test['f4_c4'] = test['eeg_f4'] - test['eeg_c4']
test['c4_p4'] = test['eeg_c4'] - test['eeg_p4']
test['p4_o2'] = test['eeg_p4'] - test['eeg_o2']

In [None]:
features_n = ['fp1_f7', 'f7_t3', 't3_t5', 't5_o1', 'fp1_f3', 'f3_c3', 'c3_p3', 'p3_o1', 'fz_cz', 'cz_pz',
                'pz_poz', 'fp2_f8', 'f8_t4', 't4_t6', 't6_o2', 'fp2_f4', 'f4_c4', 'c4_p4', 'p4_o2', "ecg", "r", "gsr"]

**Normalization**

In [None]:
d_train={'event':train['event']}
d_test={}
for i in features_n:
    d_train[i]=train[i]
    d_test[i]=test[i]

In [None]:
train_df=pd.DataFrame(d_train)