# Flightware Analysis
*Samarth Chitgopekar* (GitHub: [http-samc](https://github.com/http-samc))

## Step 1: Initialize & Import Dependencies, Declare Constants

In [33]:
import csv
from datetime import date, time, datetime
import numpy as np
from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objs as go
import plotly.io as pio
import plotly.figure_factory as ff

# Your CSV here
DATA_SOURCE = "./data/nks519.csv"

init_notebook_mode(connected=True)

## Step 2: Clean CSV and Parse Into Dictionary

In [34]:
DATA = {}

def get_month_number(name: str) -> int:
    return {
        'Jan': 1,
        'Feb': 2,
        'Mar': 3,
        'Apr': 4,
        'May': 5,
        'Jun': 6,
        'Jul': 7,
        'Aug': 8,
        'Sep': 9,
        'Oct': 10,
        'Nov': 11,
        'Dec': 12
    }[name]

with open(DATA_SOURCE, "r") as f:
    reader = csv.reader(f)

    for i, row in enumerate(reader):
        if i == 0: continue

        _flight_date = row[0].replace('Date', '').split('-')
        flight_date = date(int(_flight_date[2]), get_month_number(_flight_date[1]), int(_flight_date[0]))

        if flight_date > date.today(): continue

        _departure = row[4].replace('Departure', '').replace('\xa0CST', '').replace('\xa0CDT', '').split(':')
        departure = datetime.combine(
            flight_date,
            time(
                int(_departure[0]) + (12 if 'PM' in _departure[1] else 0),
                int(_departure[1].replace('AM', '').replace('PM', '')),
                0
            )
        )

        _duration = row[6].replace('Duration', '').replace('Scheduled', '').split(':')
        duration = int(_duration[0]) * 60 + int(_duration[1]) if len(_duration) == 2 else -1

        DATA[flight_date.strftime('%s')] = { 'departure': departure.strftime('%s'), 'duration': duration}

## Step 3: Get Final Data for Plots

In [35]:
departure_data = list(filter(
    lambda d: d > 0,
    map(
        lambda d: int(DATA[d]['departure']) if DATA[d]['duration'] > 0 else -1,
        DATA.keys()
    )
))

duration_data = list(filter(
    lambda d: d > 0,
    map(
        lambda d: DATA[d]['duration'],
        DATA.keys()
    )
))

## Step 4: Define Helper Function for Stats

In [36]:
def get_data_statistics(data):
    return {
        'std': np.std(data),
        'median': np.median(data),
        'mean': np.mean(data),
        'min': np.min(data),
        'max': np.max(data),
        'n': len(data)
    }

def get_z_score(std, mean, val):
    return (val - mean)/std

## Plot Flight Duration Over Time

In [37]:
layout = go.Layout(
    title = "Flight Duration vs Flight Departure Date",
    xaxis = {"title": "Flight Departure Date"},
    yaxis = {"title": "Flight Duration (mins)"},
)

fig = go.Figure(layout=layout)

fig.add_trace(go.Scatter(
    x = list(map(lambda d: date.fromtimestamp(d),departure_data)),
    y = duration_data
))


## Plot Histogram

In [38]:
fig = ff.create_distplot([duration_data], ['distplot'])
fig.show()

## Get Sample Statistics

In [39]:
get_data_statistics(duration_data)

{'std': 12.377061182740993,
 'median': 139.0,
 'mean': 140.65934065934067,
 'min': 117,
 'max': 200,
 'n': 91}