# Correlation analysis

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats

plt.rcParams['figure.figsize'] = [5,5]


df = pd.read_csv('data.csv')

aircraft_types_key = {
'A': 'Airplane',
'B': 'Helicopter',
'C': 'Glider',
'D': 'Balloon',
'F': 'Dirigible',
'I': 'Gyroplane',
'J': 'Ultralight',
'Y': 'Other',
'Z': 'Unknown'
}
# drop rows where INCIDENT_YEAR < 2012

df = df[df['INCIDENT_YEAR'] >= 2012]

print(len(df))

col_mapping = pd.read_csv('column_mapping.csv')

airports_info = pd.read_csv('airports.csv')

df['AC_CLASS'] = df['AC_CLASS'].str.strip()

mass_map = {
    1.0 : "2,250 kg or less",
    2.0 : "2,251 - 5,700 kg",
    3.0 : "5,701 - 27,000 kg",
    4.0 : "27,001 - 272,000 kg",
    5.0 : "above 272,000 kg",
}

engine_position_map = {
    1.0 : "Engine mounted below the wing",
    2.0 : "Engine mounted above the wing",
    3.0 : "Engine is an integral part of the wing root",
    4.0 : "Engine is nacelle-mounted on the wing (i.e. piston or turboprop)",
    5.0 : "Engine is mounted on the aft fuselage",
    6.0 : "Engine is in the empennage (helicopters)",
    7.0 : "Engine mounted at the intake of the nose",
}

type_eng_map = {
    'A' : "Reciprocating engine (piston)",
    'B' : "Turbojet",
    'C' : "Turboprop",
    'D' : "Turbofan",
    'E' : "None (glider)",
    'F' : "Turboshaft (helicopter)",
    'Y' : "Other",
}

df.TYPE_ENG = df.TYPE_ENG.map(type_eng_map)


class_map = {
    'A' : 'Airplane',
    'B' : 'Helicopter',
    'C' : 'Glider',
    'D' : 'Balloon',
    'F' : 'Dirigible',
    'I' : 'Gyroplane',
    'J' : 'Ultralight',
    'Y' : 'Other',
    'Z' : 'Unknown',
}

df.AC_CLASS = df.AC_CLASS.map(class_map)

irrelevant_cols = [
    'INDEX_NR',
    # 'INCIDENT_DATE',
    # 'OPID', # operator id
    # 'REG',
    # 'AMA',
    # 'AMO',
    # 'EMA', # engine make
    # 'EMO', # engine model
    # 'COST_REPAIRS_INFL_ADJ',
    # 'COST_OTHER_INFL_ADJ',
    # 'EFFECT_OTHER',
    # 'OTHER_SPECIFY',
    # 'EFFECT_OTHER',
    # 'BIRD_BAND_NUMBER',
    # 'REMARKS',
    # 'REMAINS_COLLECTED',
    # 'REMAINS_SENT',
    # 'COMMENTS',
    # 'REPORTED_NAME',
    # 'REPORTED_TITLE',
    # 'SOURCE',
    # 'PERSON',
    # 'LUPDATE',
    # 'TRANSFER',
    # 'RUNWAY',
]

# df['IS_SPRING'] = df['INCIDENT_MONTH'].apply(lambda x: 1 if x in [3, 4, 5] else 0)
# df['IS_SUMMER'] = df['INCIDENT_MONTH'].apply(lambda x: 1 if x in [6, 7, 8] else 0)
# df['IS_FALL'] = df['INCIDENT_MONTH'].apply(lambda x: 1 if x in [9, 10, 11] else 0)
# df['IS_WINTER'] = df['INCIDENT_MONTH'].apply(lambda x: 1 if x in [12, 1, 2] else 0)
df['TIME_OF_YEAR'] = df['INCIDENT_MONTH'].apply(lambda x: 'Spring' if x in [3, 4, 5] else ('Summer' if x in [6, 7, 8] else ('Fall' if x in [9, 10, 11] else 'Winter')))

df.AC_MASS = df.AC_MASS.map(mass_map)

df.ENG_1_POS = df.ENG_1_POS.map(engine_position_map)
df.ENG_2_POS = df.ENG_2_POS.map(engine_position_map)
df.ENG_3_POS = df.ENG_3_POS.map(engine_position_map)
df.ENG_4_POS = df.ENG_4_POS.map(engine_position_map)

df = df.drop(columns=irrelevant_cols)


all_map = {
    'STR_RAD' : 'Struck Radome',
    'DAM_RAD' : 'Damaged Radome',
    'STR_WINDSHLD' : 'Struck Windshield',
    'DAM_WINDSHLD' : 'Damaged Windshield',
    'STR_NOSE' : 'Struck Nose',
    'DAM_NOSE' : 'Damaged Nose',
    'STR_ENG1' : 'Struck Engine 1',
    'DAM_ENG1' : 'Damaged Engine 1',
    'ING_ENG1' : 'Ingested Engine 1',
    'STR_ENG2' : 'Struck Engine 2',
    'DAM_ENG2' : 'Damaged Engine 2',
    'ING_ENG2' : 'Ingested Engine 2',
    'STR_ENG3' : 'Struck Engine 3',
    'DAM_ENG3' : 'Damaged Engine 3',
    'ING_ENG3' : 'Ingested Engine 3',
    'STR_ENG4' : 'Struck Engine 4',
    'DAM_ENG4' : 'Damaged Engine 4',
    'ING_ENG4' : 'Ingested Engine 4',
    'INGESTED_OTHER' : 'Ingested Other',
    'STR_PROP' : 'Struck Propeller',
    'DAM_PROP' : 'Damaged Propeller',
    'STR_WING_ROT' : 'Struck Wing or Rotor',
    'DAM_WING_ROT' : 'Damaged Wing or Rotor',
    'STR_FUSE' : 'Struck Fuselage',
    'DAM_FUSE' : 'Damaged Fuselage',
    'STR_LG' : 'Struck Landing Gear',
    'DAM_LG' : 'Damaged Landing Gear',
    'STR_TAIL' : 'Struck Tail',
    'DAM_TAIL' : 'Damaged Tail',
    'STR_LGHTS' : 'Struck Lights',
    'DAM_LGHTS' : 'Damaged Lights',
    'STR_OTHER' : 'Struck Other',
    'DAM_OTHER' : 'Damaged Other',
}
dam_map = {
    'DAM_RAD' : 'Damaged Radome',
    'DAM_WINDSHLD' : 'Damaged Windshield',
    'DAM_NOSE' : 'Damaged Nose',
    'DAM_ENG1' : 'Damaged Engine 1',
    'DAM_ENG2' : 'Damaged Engine 2',
    'DAM_ENG3' : 'Damaged Engine 3',
    'DAM_ENG4' : 'Damaged Engine 4',
    'DAM_PROP' : 'Damaged Propeller',
    'DAM_WING_ROT' : 'Damaged Wing or Rotor',
    'DAM_FUSE' : 'Damaged Fuselage',
    'DAM_LG' : 'Damaged Landing Gear',
    'DAM_TAIL' : 'Damaged Tail',
    'DAM_LGHTS' : 'Damaged Lights',
    'DAM_OTHER' : 'Damaged Other',
}
struck_map = {
    'STR_RAD' : 'Struck Radome',
    'STR_WINDSHLD' : 'Struck Windshield',
    'STR_NOSE' : 'Struck Nose',
    'STR_ENG1' : 'Struck Engine 1',
    'STR_ENG2' : 'Struck Engine 2',
    'STR_ENG3' : 'Struck Engine 3',
    'STR_ENG4' : 'Struck Engine 4',
    'STR_PROP' : 'Struck Propeller',
    'STR_WING_ROT' : 'Struck Wing or Rotor',
    'STR_FUSE' : 'Struck Fuselage',
    'STR_LG' : 'Struck Landing Gear',
    'STR_TAIL' : 'Struck Tail',
    'STR_LGHTS' : 'Struck Lights',
    'STR_OTHER' : 'Struck Other',
}

ingested_map = {
    'ING_ENG1' : 'Ingested Engine 1',
    'ING_ENG2' : 'Ingested Engine 2',
    'ING_ENG3' : 'Ingested Engine 3',
    'ING_ENG4' : 'Ingested Engine 4',
    'INGESTED_OTHER' : 'Ingested Other',
}


def struck_str(row):
    struck_str = ''
    for key, value in struck_map.items():
        if row[key] == 1:
            struck_str += value + ', '
    return struck_str[:-2] if struck_str != '' else 'None'

def dam_str(row):
    dam_str = ''
    for key, value in dam_map.items():
        if row[key] == 1:
            dam_str += value + ', '
    return dam_str[:-2] if dam_str != '' else 'None'

def ingested_str(row):
    ingested_str = ''
    for key, value in ingested_map.items():
        if row[key] == 1:
            ingested_str += value + ', '
    return ingested_str[:-2] if ingested_str != '' else 'None'

def all_str(row):
    all_str = ''
    for key, value in all_map.items():
        if row[key] == 1:
            all_str += value + ', '
    return all_str[:-2] if all_str != '' else 'None'

df['ALL_RESULT'] = df.apply(all_str, axis=1)
df['STRUCK_RESULT'] = df.apply(struck_str, axis=1)
df['DAM_RESULT'] = df.apply(dam_str, axis=1)
df['INGESTED_RESULT'] = df.apply(ingested_str, axis=1)
damage_map = {
    'N' : 'None',
    'M' : 'Minor',
    'M?' : 'Uncertain Level',
    'S' : 'Substantial',
    'D' : 'Destroyed',
}

df['DAMAGE'] = df['DAMAGE_LEVEL'].map(damage_map)

# drop all the columns we don't need anymore
df.drop(columns=all_map.keys(), inplace=True)

df.drop(columns=['DAMAGE_LEVEL'], inplace=True)

df.head()

df.DAMAGE.value_counts()

# remove K from airport ID
df['AIRPORT_ID'] = df['AIRPORT_ID'].str[1:]
