# Prep Notes
- Always remember that you learn through iteration. 
- The more you interate, the more you learn.
- You are overwhelmed because the data is alien to you. 
- You need to run through it and always take notes to guide you back as bread crumbs to what is important.

In [6]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_style('whitegrid')
pd.options.display.max_rows = 200
pd.options.display.max_columns = 1000

In [7]:
df = pd.read_excel('../input/case-data/Case data.xlsx', )
flight = pd.read_excel('../input/case-data/Case data.xlsx', sheet_name='Flight Category Description')
delay = pd.read_excel('../input/case-data/Case data.xlsx', sheet_name='Delay Group code')

In [8]:
# df = df.join(flight.set_index('Flight Type Code'), on='flight_type')
# df = df.join(delay.set_index('Code'), on='prim_c')

In [9]:
df.dropna(axis=1, how='all', inplace=True)
df.drop('ttl_infants', axis=1, inplace=True)

df['ttl_dep_dly'] = [x.hour * 60 + x.minute for x in df['ttl_dep_dly']]
df['ttl_arr_dly'] = [x.hour * 60 + x.minute if x is not np.nan else np.nan for x in df['ttl_arr_dly']]

df['ttl_seats'] = df['ttl_seats'].groupby(df['sub_fleet']).transform('max') ## fillna missing ttl_seats by sub_fleet
df['ttl_seats'].fillna(0, inplace=True)

df['passenger'] = [1 if x == 'J' else 0 for x in df['flight_type']]
df['departed'] = np.where(df['atd_z_date_time'].isnull(), 0, 1)
df['otp'] = [1 if x <= 15 else 0 for x in df['ttl_dep_dly']]

In [47]:
df['control'] = np.where(df['Controllable'] == 'Y', 1, 0)
df['reason_category'] = df['Reason'].str.split('-').apply(lambda x:x[0] if x is not np.nan else 'na')

In [48]:
df['rpm'] = df['distance'] * df['ttl_pax_rev']
df['asm'] = df['distance'] * df['ttl_seats']
df['route'] = df['sched_dep_iata'] + '-' + df['sched_arr_iata']

df['flight_distance'] = pd.cut(df.query('passenger==1 & departed==1')['distance'],
                               5, labels=["0_ultrashort", "1_short", "2_medium", "3_long", "4_ultralong"])

In [49]:
target = 'otp'
num = [x for x in df.columns if df.dtypes[x] in ('float', 'int64')]
cat = [x for x in df.columns if df.dtypes[x] == 'object']
tim = [x for x in df.columns if df.dtypes[x] == 'datetime64[ns]']
num.remove(target)

In [50]:
ps = df.loc[
    (df['departed']==1)
    & (df['passenger'] == 1)
].copy()

# Explore

## Summary Notes
- Currently, OTP for passenger flights are at 71.4%.
- Out of all 213 flights that experienced delay, 29 are within our control.
- By focusing on these, we can improve OTP by 75.3%.

Notes:
- Data excludes flights that does not have departure data.
- Data only includes Passenger Flights.
- There are some passenger_flights that flew even though ttl_seats=0.
    - I have filled this value referencing the total expected seats of the sub_fleet.
- There are different kinds of delay codes given to several flights.
    - I am just focusing only on the delay code in prim_c.

## Correlation

In [84]:
corr = ps.corr()
corr[target].sort_values(ascending=False)

otp                    1.000000
ttl_fd_crew            0.058738
f_seats                0.050210
distance               0.043043
f_pax_rev              0.039457
block_difference       0.023641
asm                    0.021957
dep_fuel               0.016858
mails                 -0.004438
arrival_fuel          -0.007536
ib_connex_pax_total   -0.015416
ttl_ops_crew          -0.016274
ttl_cc_crew           -0.038227
ttl_pax_pad           -0.048552
cargo                 -0.071297
arr_fuel              -0.095304
j_seats               -0.095905
control               -0.105677
rpm                   -0.106442
ttl_seats             -0.113277
fuel_remaining        -0.115672
y_seats               -0.124736
lcl_term_pax_ttl      -0.137739
j_pax_rev             -0.155202
ob_connex_pax_total   -0.155487
bags_kgs              -0.215420
lcl_joining_pax_ttl   -0.229803
y_pax_rev             -0.233126
payload               -0.236534
ttl_pax_rev           -0.239723
ttl_arr_dly           -0.567718
ttl_dep_

## Missing total Seats

In [51]:
df.ttl_seats.isnull().sum()

0

In [52]:
df.groupby('sub_fleet')['ttl_seats'].max()

sub_fleet
A320-232      158.0
A321-231SL    196.0
B777-200F       9.0
B777-300      370.0
B787-10       336.0
B787-9        290.0
Name: ttl_seats, dtype: float64

In [53]:
df['ttl_seats'].groupby(df['sub_fleet']).transform('max')

0      336.0
1      336.0
2      290.0
3      290.0
4      336.0
       ...  
995    290.0
996    336.0
997    196.0
998    290.0
999    336.0
Name: ttl_seats, Length: 1000, dtype: float64

In [54]:
df.loc[df.ttl_seats.isnull() | (df.ttl_seats==0)][['sub_fleet', 'flight_category', 'ttl_seats']]

Unnamed: 0,sub_fleet,flight_category,ttl_seats
5,,,0.0
7,,,0.0
92,,,0.0
133,,,0.0
138,,,0.0
140,,,0.0
202,,,0.0
725,,,0.0
830,,,0.0


In [55]:
ps.groupby('sub_fleet')['ttl_seats'].max()

sub_fleet
A320-232      158.0
A321-231SL    196.0
B777-300      370.0
B787-10       336.0
B787-9        290.0
Name: ttl_seats, dtype: float64

In [56]:
df.query('ttl_seats==0 & flight_type=="J"')['ttl_seats']

Series([], Name: ttl_seats, dtype: float64)

In [57]:
df.iloc[485][['sub_fleet', 'ttl_seats']]

sub_fleet    B787-9
ttl_seats     290.0
Name: 485, dtype: object

In [58]:
df.iloc[843][['sub_fleet', 'ttl_seats']]

sub_fleet    B787-10
ttl_seats      336.0
Name: 843, dtype: object

In [59]:
df.iloc[843][['sub_fleet', 'ttl_seats']]

sub_fleet    B787-10
ttl_seats      336.0
Name: 843, dtype: object

## Flights in order

In [60]:
focus = df.loc[df['reg']=='A6-BLK'].copy()

In [61]:
cnum = [
    'block_difference',
 'distance',
 'ttl_dep_dly', 'ttl_arr_dly',
#  'f_pax_rev', 'f_seats',
#  'j_pax_rev', 'j_seats',
#  'y_pax_rev', 'y_seats',
 'ttl_pax_pad',
 'ttl_fd_crew', 'ttl_cc_crew', 'ttl_ops_crew',
 'ttl_pax_rev', 'ttl_seats',
#  'ib_connex_pax_total', 'ob_connex_pax_total', 'lcl_joining_pax_ttl', 'lcl_term_pax_ttl',
#  'arr_fuel', 'dep_fuel',
#  'cargo', 'mails', 'payload', 'bags_kgs',
#  'fuel_remaining', 'arrival_fuel',
#  'passenger'
]
focus[cnum]

Unnamed: 0,block_difference,distance,ttl_dep_dly,ttl_arr_dly,ttl_pax_pad,ttl_fd_crew,ttl_cc_crew,ttl_ops_crew,ttl_pax_rev,ttl_seats
63,-16.0,2345,11,0.0,2.0,2,10,12,64,290.0
82,-25.0,6319,0,0.0,2.0,4,12,16,100,290.0
170,-16.0,3015,0,0.0,0.0,2,10,12,53,290.0
187,-4.0,2793,0,0.0,1.0,2,10,12,61,290.0
339,12.0,6319,8,20.0,5.0,5,12,17,144,290.0
369,18.0,5952,4,22.0,0.0,4,12,16,138,290.0
415,-2.0,871,17,15.0,3.0,2,9,11,266,290.0
431,5.0,3548,5,10.0,1.0,2,10,12,243,290.0
485,2.0,1229,149,151.0,3.0,2,9,11,265,290.0
719,21.0,6508,19,40.0,1.0,4,12,16,37,290.0


In [62]:
col = [
    'reg'
#        , 'flight_number'
    , 'route'
#     , 'std_z_date_time'
       , 'atd_z_date_time' , 'ttl_dep_dly'
#     , 'sta_z_date_time' 
       , 'ata_z_date_time'
       , 'ttl_arr_dly'
#     , 'distance'
    , 'ttl_seats'
       , 'flight_type'
      ]
focus[col].sort_values(by='atd_z_date_time')

Unnamed: 0,reg,route,atd_z_date_time,ttl_dep_dly,ata_z_date_time,ttl_arr_dly,ttl_seats,flight_type
63,A6-BLK,AUH-FCO,2021-10-02 04:26:00,11,2021-10-02 10:25:00,0.0,290.0,J
82,A6-BLK,AUH-ORD,2021-10-03 04:59:00,0,2021-10-03 19:14:00,0.0,290.0,J
170,A6-BLK,KUL-AUH,2021-10-05 18:12:00,0,2021-10-06 00:56:00,0.0,290.0,J
187,A6-BLK,AUH-BCN,2021-10-06 03:56:00,0,2021-10-06 11:07:00,0.0,290.0,J
339,A6-BLK,ORD-AUH,2021-10-09 00:23:00,8,2021-10-09 13:55:00,20.0,290.0,J
369,A6-BLK,AUH-LAX,2021-10-10 05:39:00,4,2021-10-10 19:57:00,22.0,290.0,J
415,A6-BLK,AUH-JED,2021-10-12 07:52:00,17,2021-10-12 10:35:00,15.0,290.0,J
431,A6-BLK,CGK-AUH,2021-10-13 16:15:00,5,2021-10-14 00:05:00,10.0,290.0,J
485,A6-BLK,NCR-AUH,2021-10-17 02:19:00,149,2021-10-17 05:51:00,151.0,290.0,J
719,A6-BLK,SYD-AUH,2021-10-23 10:34:00,19,2021-10-24 01:25:00,40.0,290.0,J


## Delay Codes unstacked

In [63]:
dly_code = [x for x in cat if x.startswith('c') and not x.endswith('desc')]
dly_code.append('prim_c')
ch = df[dly_code].stack().reset_index()
ch.columns = ['index', 'val', 'col']
ch.drop_duplicates(subset=['col'], keep='first', inplace=True)
ch.head(10)
dly_ch = ch.pivot_table(index='index', columns='col', aggfunc='count', fill_value=0)
dly_ch

Unnamed: 0_level_0,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val,val
col,03A,04A,06B,09A,09B,10A,10C,12A,13A,15B,15D,16A,16B,16D,16E,16F,16G,16H,16I,16J,18A,18B,21A,21B,21C,22A,22B,22C,23B,24B,31B,32A,32B,33A,34A,35B,36A,36B,38A,39A,40A,41B,42B,46R,49A,51A,52A,55A,55B,55C,58A,61A,62A,62C,63A,63B,63C,65B,71A,72A,77A,81A,85A,85B,86A,86B,86U,87A,87B,87D,87E,89A,89B,91B,91R,92R,93R,96O,96R,96T,99A,XXX
index,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2,Unnamed: 31_level_2,Unnamed: 32_level_2,Unnamed: 33_level_2,Unnamed: 34_level_2,Unnamed: 35_level_2,Unnamed: 36_level_2,Unnamed: 37_level_2,Unnamed: 38_level_2,Unnamed: 39_level_2,Unnamed: 40_level_2,Unnamed: 41_level_2,Unnamed: 42_level_2,Unnamed: 43_level_2,Unnamed: 44_level_2,Unnamed: 45_level_2,Unnamed: 46_level_2,Unnamed: 47_level_2,Unnamed: 48_level_2,Unnamed: 49_level_2,Unnamed: 50_level_2,Unnamed: 51_level_2,Unnamed: 52_level_2,Unnamed: 53_level_2,Unnamed: 54_level_2,Unnamed: 55_level_2,Unnamed: 56_level_2,Unnamed: 57_level_2,Unnamed: 58_level_2,Unnamed: 59_level_2,Unnamed: 60_level_2,Unnamed: 61_level_2,Unnamed: 62_level_2,Unnamed: 63_level_2,Unnamed: 64_level_2,Unnamed: 65_level_2,Unnamed: 66_level_2,Unnamed: 67_level_2,Unnamed: 68_level_2,Unnamed: 69_level_2,Unnamed: 70_level_2,Unnamed: 71_level_2,Unnamed: 72_level_2,Unnamed: 73_level_2,Unnamed: 74_level_2,Unnamed: 75_level_2,Unnamed: 76_level_2,Unnamed: 77_level_2,Unnamed: 78_level_2,Unnamed: 79_level_2,Unnamed: 80_level_2,Unnamed: 81_level_2,Unnamed: 82_level_2
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
13,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
16,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
20,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0


## On Time Performance

In [64]:
ps[target].agg(['mean', 'sum', 'count'])

mean       0.71371
sum      531.00000
count    744.00000
Name: otp, dtype: float64

In [65]:
ps.groupby([target, 'control'])[target].agg(['mean', 'sum', 'count'])

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,sum,count
otp,control,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,0,0.0,0,184
0,1,0.0,0,29
1,0,1.0,494,494
1,1,1.0,37,37


## Travel Passenger Distribution. Load Factors.

In [66]:
capacity = ps.groupby(['dep_region', 'sched_dep_iata', 'sched_arr_iata'])[['ttl_pax_rev', 'ttl_seats']].sum()

capacity = (capacity['ttl_pax_rev'] / capacity['ttl_seats']).reset_index()

In [67]:
capacity

Unnamed: 0,dep_region,sched_dep_iata,sched_arr_iata,0
0,Africa,CAI,AUH,0.801016
1,Africa,CMN,AUH,0.486207
2,Africa,NBO,AUH,0.331034
3,Africa,SEZ,AUH,0.485759
4,Americas,IAD,AUH,0.482759
5,Americas,LAX,AUH,0.647783
6,Americas,ORD,AUH,0.596552
7,Americas,YYZ,AUH,0.881439
8,Asia,AMD,AUH,0.781034
9,Asia,AMM,AUH,0.669922


In [68]:
capacity.columns = ['dep_region', 'sched_dep_iata', 'sched_arr_iata', 'load_factor']

In [69]:
ps[['flight_number', 'reg']].value_counts()

flight_number  reg   
OP0130         A6-BLD    5
OP0018         A6-BLB    4
OP0008         A6-BMB    4
OP0273         A6-AEC    3
OP2333         A6-AEI    3
                        ..
OP0221         A6-AEC    1
               A6-AEH    1
               A6-AEJ    1
OP0222         A6-AEC    1
OP2552         A6-BNA    1
Length: 631, dtype: int64

In [70]:
ps.query("ttl_seats=='BOM'")

Unnamed: 0,schd_dep_date,flight_number,reg,fleet_family,sub_fleet,sched_dep_iata,dep_country,dep_region,sched_arr_iata,arr_region,std_z,std_z_date,std_z_date_time,sta_z_date_time,etd_z_date_time,eta_z_date_time,atd_z,atd_z_date,atd_z_date_time,ata_z_date_time,sched_block,actual_block,block_difference,std_z_weekend,std_z_monthyear,std_z_year,sta_z_weekend,distance,flight_type,flight_category,prim_dly,prim_c,prim_c_desc,dly1,c1_1,c1_desc,dly2,c2_1,c2_desc,dly3,c3_1,c3_desc,dly4,c4_1,c4_desc,remarks,ttl_dep_dly,ttl_arr_dly,f_pax_rev,f_seats,j_pax_rev,j_seats,y_pax_rev,y_seats,ttl_pax_pad,ttl_fd_crew,ttl_cc_crew,ttl_ops_crew,ttl_pax_rev,ttl_seats,ib_connex_pax_total,ob_connex_pax_total,lcl_joining_pax_ttl,lcl_term_pax_ttl,arr_fuel,dep_fuel,cargo,mails,payload,bags_kgs,fuel_remaining,arrival_fuel,Reason,OTP Working Group,Controllable,passenger,departed,otp,control,reason_category,rpm,asm,route,flight_distance


In [71]:
ps.query("sched_dep_iata=='BOM' & reg=='A6-BME'")[['otp', 'flight_number', 'reg', 'sched_dep_iata', 'sched_arr_iata', 'distance' , 'ttl_pax_rev', 'ttl_seats', 'rpm', 'asm']]

Unnamed: 0,otp,flight_number,reg,sched_dep_iata,sched_arr_iata,distance,ttl_pax_rev,ttl_seats,rpm,asm
734,0,OP0205,A6-BME,BOM,AUH,1063,302,336.0,321026,357168.0
843,1,OP0205,A6-BME,BOM,AUH,1063,292,336.0,310396,357168.0


In [72]:
ps.query('ttl_seats ==0')

Unnamed: 0,schd_dep_date,flight_number,reg,fleet_family,sub_fleet,sched_dep_iata,dep_country,dep_region,sched_arr_iata,arr_region,std_z,std_z_date,std_z_date_time,sta_z_date_time,etd_z_date_time,eta_z_date_time,atd_z,atd_z_date,atd_z_date_time,ata_z_date_time,sched_block,actual_block,block_difference,std_z_weekend,std_z_monthyear,std_z_year,sta_z_weekend,distance,flight_type,flight_category,prim_dly,prim_c,prim_c_desc,dly1,c1_1,c1_desc,dly2,c2_1,c2_desc,dly3,c3_1,c3_desc,dly4,c4_1,c4_desc,remarks,ttl_dep_dly,ttl_arr_dly,f_pax_rev,f_seats,j_pax_rev,j_seats,y_pax_rev,y_seats,ttl_pax_pad,ttl_fd_crew,ttl_cc_crew,ttl_ops_crew,ttl_pax_rev,ttl_seats,ib_connex_pax_total,ob_connex_pax_total,lcl_joining_pax_ttl,lcl_term_pax_ttl,arr_fuel,dep_fuel,cargo,mails,payload,bags_kgs,fuel_remaining,arrival_fuel,Reason,OTP Working Group,Controllable,passenger,departed,otp,control,reason_category,rpm,asm,route,flight_distance


In [73]:
(capacity.groupby(['dep_region','sched_dep_iata', 'sched_arr_iata'])['load_factor'].max()
 .sort_values(ascending=False))

dep_region  sched_dep_iata  sched_arr_iata
Asia        AUH             JED               0.941349
                            HYD               0.922089
            LHE             AUH               0.917918
            AUH             MAA               0.914266
                            NCR               0.913793
                            ISB               0.909959
                            DMM               0.906868
            KHI             AUH               0.896325
            TRV             AUH               0.893878
            ISB             AUH               0.884088
            AUH             BOM               0.881497
Americas    YYZ             AUH               0.881439
Asia        AUH             YYZ               0.880392
            MAA             AUH               0.880134
            BOM             AUH               0.875221
            AUH             TRV               0.860544
            HYD             AUH               0.858757
            DAC       

In [74]:
df[['sched_dep_iata', 'sched_arr_iata', 'distance' , 'ttl_pax_rev', 'ttl_seats', 'rpm', 'asm']]

Unnamed: 0,sched_dep_iata,sched_arr_iata,distance,ttl_pax_rev,ttl_seats,rpm,asm
0,CAI,AUH,1283,314,336.0,402862,431088.0
1,MXP,AUH,2544,69,336.0,175536,854784.0
2,AUH,KUL,3015,48,290.0,144720,874350.0
3,BRU,AUH,2788,43,290.0,119884,808520.0
4,MNL,AUH,3769,290,336.0,1093010,1266384.0
...,...,...,...,...,...,...,...
995,AUH,ORD,6319,171,290.0,1080549,1832510.0
996,LHR,AUH,2977,170,336.0,506090,1000272.0
997,AUH,COK,1509,185,196.0,279165,295764.0
998,MUC,AUH,2471,274,290.0,677054,716590.0


In [75]:
df[tim].min()

schd_dep_date     2021-10-01 00:00:00
std_z_date        2021-10-01 00:00:00
std_z_date_time   2021-10-01 00:10:00
sta_z_date_time   2021-10-01 01:20:00
etd_z_date_time   2021-10-01 00:10:00
eta_z_date_time   2021-10-01 01:12:00
atd_z_date        2021-10-01 00:00:00
atd_z_date_time   2021-10-01 00:05:00
ata_z_date_time   2021-10-01 01:13:00
std_z_weekend     2021-10-02 00:00:00
std_z_monthyear   2021-10-01 00:00:00
sta_z_weekend     2021-10-02 00:00:00
dtype: datetime64[ns]

In [76]:
df.head()

Unnamed: 0,schd_dep_date,flight_number,reg,fleet_family,sub_fleet,sched_dep_iata,dep_country,dep_region,sched_arr_iata,arr_region,std_z,std_z_date,std_z_date_time,sta_z_date_time,etd_z_date_time,eta_z_date_time,atd_z,atd_z_date,atd_z_date_time,ata_z_date_time,sched_block,actual_block,block_difference,std_z_weekend,std_z_monthyear,std_z_year,sta_z_weekend,distance,flight_type,flight_category,prim_dly,prim_c,prim_c_desc,dly1,c1_1,c1_desc,dly2,c2_1,c2_desc,dly3,c3_1,c3_desc,dly4,c4_1,c4_desc,remarks,ttl_dep_dly,ttl_arr_dly,f_pax_rev,f_seats,j_pax_rev,j_seats,y_pax_rev,y_seats,ttl_pax_pad,ttl_fd_crew,ttl_cc_crew,ttl_ops_crew,ttl_pax_rev,ttl_seats,ib_connex_pax_total,ob_connex_pax_total,lcl_joining_pax_ttl,lcl_term_pax_ttl,arr_fuel,dep_fuel,cargo,mails,payload,bags_kgs,fuel_remaining,arrival_fuel,Reason,OTP Working Group,Controllable,passenger,departed,otp,control,reason_category,rpm,asm,route,flight_distance
0,2021-10-01,OP0654,A6-BMF,B787,B787-10,CAI,Egypt,Africa,AUH,Asia,10:35:00,2021-10-01,2021-10-01 10:35:00,2021-10-01 13:55:00,2021-10-01 11:00:00,2021-10-01 14:12:00,11:02:00,2021-10-01,2021-10-01 11:02:00,2021-10-01 14:11:00,03:20:00,03:09:00,-11.0,2021-10-02,2021-10-01,2021,2021-10-02,1283,J,,00:19:00,32B,Late loaders; Lack of loading staff; Incorrect...,00:19:00,32B,Late loaders; Lack of loading staff; Incorrect...,00:08:00,93R,Applicable to all delays caused by late arriva...,,,,,,,,27,16.0,0,0,32,32,282,304,2.0,2,10,12,314,336.0,1,29,313,285,5500,20500,0,0,34138,9105,6300.0,5500.0,Aircraft & Ramp - Errors or late/lack of loadi...,Below Wing,Y,1,1,0,1,Aircraft & Ramp,402862,431088.0,CAI-AUH,0_ultrashort
1,2021-10-01,OP0088,A6-BMD,B787,B787-10,MXP,Italy,Europe,AUH,Asia,12:50:00,2021-10-01,2021-10-01 12:50:00,2021-10-01 18:45:00,2021-10-01 13:15:00,2021-10-01 19:10:00,13:17:00,2021-10-01,2021-10-01 13:17:00,2021-10-01 19:10:00,05:55:00,05:53:00,-2.0,2021-10-02,2021-10-01,2021,2021-10-02,2544,J,,00:14:00,93R,Applicable to all delays caused by late arriva...,00:14:00,93R,Applicable to all delays caused by late arriva...,00:13:00,86B,Extraordinary checks of guests/aircraft by CIH...,,,,,,,,27,25.0,0,0,28,32,41,304,1.0,2,10,12,69,336.0,0,38,69,31,5800,35100,15391,841,25070,1689,5300.0,5800.0,Reactionary - Late aircraft rotation due Other,Others,N,1,1,0,0,Reactionary,175536,854784.0,MXP-AUH,1_short
2,2021-10-01,OP0418,A6-BLX,B787,B787-9,AUH,United Arab Emirates,Asia,KUL,Asia,21:25:00,2021-10-01,2021-10-01 21:25:00,2021-10-02 04:35:00,2021-10-01 21:25:00,2021-10-02 04:38:00,21:27:00,2021-10-01,2021-10-01 21:27:00,2021-10-02 04:36:00,07:10:00,07:09:00,-1.0,2021-10-02,2021-10-01,2021,2021-10-02,3015,J,,00:02:00,03A,Flight departure within 3 minutes of STD irres...,00:02:00,03A,Flight departure within 3 minutes of STD irres...,,,,,,,,,,,2,1.0,0,0,6,28,42,262,0.0,2,10,12,48,290.0,39,9,9,39,7100,40500,0,0,0,0,5200.0,7100.0,Other - Flight departure within 3 minutes of STD,Others,N,1,1,1,0,Other,144720,874350.0,AUH-KUL,2_medium
3,2021-10-01,OP0058,A6-BNC,B787,B787-9,BRU,Belgium,Europe,AUH,Asia,13:00:00,2021-10-01,2021-10-01 13:00:00,2021-10-01 19:25:00,2021-10-01 13:00:00,2021-10-01 19:25:00,13:03:00,2021-10-01,2021-10-01 13:03:00,2021-10-01 19:26:00,06:25:00,06:23:00,-2.0,2021-10-02,2021-10-01,2021,2021-10-02,2788,J,,00:03:00,03A,Flight departure within 3 minutes of STD irres...,00:03:00,03A,Flight departure within 3 minutes of STD irres...,,,,,,,,,,,3,1.0,0,0,14,28,29,262,4.0,2,10,12,43,290.0,12,16,31,27,5500,33500,8368,1740,16108,886,5400.0,5500.0,Other - Flight departure within 3 minutes of STD,Others,N,1,1,1,0,Other,119884,808520.0,BRU-AUH,2_medium
4,2021-10-01,OP2423,A6-BMC,B787,B787-10,MNL,Philippines,Asia,AUH,Asia,15:45:00,2021-10-01,2021-10-01 15:45:00,2021-10-02 00:35:00,2021-10-01 15:45:00,2021-10-02 00:13:00,15:53:00,2021-10-01,2021-10-01 15:53:00,2021-10-02 00:12:00,08:50:00,08:19:00,-31.0,2021-10-02,2021-10-01,2021,2021-10-02,3769,J,,00:08:00,86B,Extraordinary checks of guests/aircraft by CIH...,00:08:00,86B,Extraordinary checks of guests/aircraft by CIH...,,,,,,,,,,,8,0.0,0,0,9,32,281,304,0.0,3,11,14,290,336.0,0,208,290,82,9900,59700,16082,0,46832,6771,8400.0,9900.0,Airport/Govt Authorities - CIH authorities,AptAuth,N,1,1,1,0,Airport/Govt Authorities,1093010,1266384.0,MNL-AUH,2_medium


In [77]:
(df.query('flight_type=="J"')
 .groupby('route')['ttl_pax_rev']
 .agg(['count', 'mean', 'sum'])
 .sort_values(by='sum', ascending=False))

Unnamed: 0_level_0,count,mean,sum
route,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
LHR-AUH,28,185.821429,5203
CAI-AUH,19,207.421053,3941
AUH-RUH,13,208.846154,2715
AUH-JED,12,214.0,2568
FRA-AUH,13,196.846154,2559
AUH-CAI,19,124.736842,2370
YYZ-AUH,8,290.875,2327
AUH-LHR,15,154.8,2322
DAC-AUH,8,274.25,2194
AUH-FRA,13,168.615385,2192


In [78]:
sum(ps['ttl_pax_rev']) / sum(ps['ttl_seats'])

0.5284462611110567

## Distance of Flights

In [79]:
df.query('passenger==1 & departed==1')['distance'].describe()

count     744.000000
mean     2401.345430
std      1666.704526
min       175.000000
25%      1149.000000
50%      2000.000000
75%      3015.000000
max      6508.000000
Name: distance, dtype: float64

In [80]:
ps.loc[~ps['flight_distance'].isnull()].groupby('flight_distance')['flight_type'].count()

flight_distance
0_ultrashort    247
1_short         221
2_medium        189
3_long            6
4_ultralong      81
Name: flight_type, dtype: int64

# Submit

In [81]:
df.to_csv('case_submission.csv', index=False)

In [82]:
cumsum = ps.groupby(['schd_dep_date'])[['otp', 'passenger']].sum().cumsum()
cumsum.to_csv('cumsum.csv')

In [83]:
cumsum

Unnamed: 0_level_0,otp,passenger
schd_dep_date,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-10-01,15,27
2021-10-02,28,53
2021-10-03,44,74
2021-10-04,63,98
2021-10-05,87,130
2021-10-06,96,140
2021-10-07,144,204
2021-10-08,159,227
2021-10-09,179,255
2021-10-10,198,280
