# Use Case Data Sets

In [2]:
# Common imports
import numpy as np # numpy is THE toolbox for scientific computing with python
import pandas as pd # pandas provides THE data structure and data analysis tools for data scientists 
import pickle # we have to import a pickle file
import os 
from datetime import timedelta, date

# maximum number of columns
pd.set_option("display.max_rows", 101)
pd.set_option("display.max_columns", 101)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

# Airline Case Study

In [14]:
df_flight = pd.read_csv('../../' + '06_case_studies/data/data3/use_case_3/' + 'flight_information.csv')
df_ground = pd.read_csv('../../' + '06_case_studies/data/data3/use_case_3/' + 'ground_information.csv')

In [15]:
df_flight.columns

Index(['Unnamed: 0', 'leg_no', 'fn_carrier', 'fn_number', 'dep_ap_sched',
       'arr_ap_sched', 'dep_sched_date', 'dep_sched_time', 'arr_sched_date',
       'arr_sched_time', 'm_offblockdt', 'm_onblockdt', 'ac_registration',
       'change_reason_code', 'dep_delay', 'Ac Type Code', 'trans_time',
       'sched_trans_time', 'Crew Group', 'TLC_trans', 'crew_type_change',
       'Sched Groundtime', 'Act Groundtime'],
      dtype='object')

In [18]:
df_flight = df_flight.drop(['Unnamed: 0', 'TLC_trans', 'crew_type_change'], axis = 1)

In [21]:
df_flight.to_csv('C:/Users/A062471/Documents/PROJECTS/iubh/model_engineering/06_case_studies/data/data3/use_case_3/flight_information.csv', index = False)

In [22]:
df_ground = df_ground.drop('Unnamed: 0', axis = 1)

In [23]:
df_ground.columns

Index(['day_of_origin', 'ac_type', 'fn_number', 'ac_registration', 'mingt',
       'dep_leg_inbound', 'arr_leg_inbound', 'arr_leg_outbound',
       'sched_inbound_dep', 'sched_inbound_arr', 'sched_outbound_dep',
       'sched_outbound_arr', 'sched_turnaround', 'leg_inbound', 'leg_outbound',
       'catering_duration', 'cleaning_duration', 'pax_boarding_duration'],
      dtype='object')

In [24]:
df_ground.to_csv('C:/Users/A062471/Documents/PROJECTS/iubh/model_engineering/06_case_studies/data/data3/use_case_3/ground_information.csv', index = False)

In [17]:
df_flight.crew_type_change.value_counts()

[]                                                  9993
['cp']                                               265
['ca']                                               227
['cp', 'cp']                                         195
['ca', 'cp', 'ca']                                    74
['cp', 'ca', 'ca']                                    73
['ca', 'ca', 'ca', 'cp', 'cp']                        68
['cp', 'ca', 'ca', 'ca', 'cp']                        65
['cp', 'cp', 'ca', 'ca', 'ca']                        65
['ca', 'ca', 'cp']                                    62
['ca', 'cp', 'ca', 'ca', 'cp']                        53
['ca', 'ca']                                          51
['ca', 'cp', 'cp', 'ca', 'ca']                        47
['cp', 'ca', 'ca', 'ca']                              46
['ca', 'cp', 'ca', 'cp', 'ca']                        44
['ca', 'ca', 'cp', 'cp', 'ca']                        43
['cp', 'ca', 'ca', 'cp', 'ca']                        42
['ca', 'cp']                   

In [11]:
df_flight = df_flight.drop('Unnamed: 0', axis = 1)


In [12]:
df = pd.merge(df_flight, df_ground, left_on = ['fn_number', 'dep_sched_date'], right_on = ['fn_number','day_of_origin'], how = 'left')
display(df.head(3))

Unnamed: 0,leg_no,fn_carrier,fn_number,dep_ap_sched,arr_ap_sched,dep_sched_date,dep_sched_time,arr_sched_date,arr_sched_time,m_offblockdt,m_onblockdt,ac_registration_x,change_reason_code,dep_delay,Ac Type Code,trans_time,sched_trans_time,Crew Group,TLC_trans,crew_type_change,Sched Groundtime,Act Groundtime,day_of_origin,ac_type,ac_registration_y,mingt,dep_leg_inbound,arr_leg_inbound,arr_leg_outbound,sched_inbound_dep,sched_inbound_arr,sched_outbound_dep,sched_outbound_arr,sched_turnaround,leg_inbound,leg_outbound,catering_duration,cleaning_duration,pax_boarding_duration
0,272005011,East Carmen Airlines,EC3098,North Gregory,East Carmen,2019-06-01,00:40,2019-06-01,04:30,2019-06-01 00:43:00,2019-06-01 04:12:00,ECLBSX,other problem,3.0,320,0,0,Start,"['Jason Brown_nan_nan_nan_cp', 'Jacob Stein_na...",[],235.0,268.0,,,,,,,,,,,,,,,,,
1,272009188,East Carmen Airlines,EC3146,Port Bobby,East Carmen,2019-06-01,02:05,2019-06-01,04:00,2019-06-01 01:58:00,2019-06-01 03:40:00,ECLWHX,no reason,-7.0,E95,0,0,Start,"['Scott Baker_nan_nan_nan_cp', 'Ruth Hoffman_n...",[],130.0,189.0,,,,,,,,,,,,,,,,,
2,271984537,East Carmen Airlines,EC2840,Port Ryan,East Carmen,2019-06-01,02:20,2019-06-01,03:50,2019-06-01 02:24:00,2019-06-01 03:57:00,ECLGLX,other problem,4.0,DH4,0,0,Start,"['Cory Cooper_nan_nan_nan_ca', 'Morgan Foster_...",[],185.0,196.0,,,,,,,,,,,,,,,,,
