In [56]:
import os
import pandas as pd
import numpy as np

import networkx as nx

import seaborn as sns
import matplotlib.pyplot as plt

In [57]:
import warnings
warnings.filterwarnings('ignore')

In [58]:
PATH = os.path.join('Data', 'Flights')

# 2. Dataset: 
* https://www.kaggle.com/datasets/usdot/flight-delays?select=flights.csv

In [59]:
flights = pd.read_csv(PATH + '/flights.csv')

In [62]:
airport = pd.read_csv(PATH + '/airports.csv')
airports = pd.unique(airport.IATA_CODE).tolist()
print(airport.shape)

(322, 7)


In [63]:
airline = pd.read_csv(PATH + '/airlines.csv')
airlines = pd.unique(airline.IATA_CODE).tolist()
airline.shape

(14, 2)

In [66]:
airport

Unnamed: 0,IATA_CODE,AIRPORT,CITY,STATE,COUNTRY,LATITUDE,LONGITUDE
0,ABE,Lehigh Valley International Airport,Allentown,PA,USA,40.65236,-75.44040
1,ABI,Abilene Regional Airport,Abilene,TX,USA,32.41132,-99.68190
2,ABQ,Albuquerque International Sunport,Albuquerque,NM,USA,35.04022,-106.60919
3,ABR,Aberdeen Regional Airport,Aberdeen,SD,USA,45.44906,-98.42183
4,ABY,Southwest Georgia Regional Airport,Albany,GA,USA,31.53552,-84.19447
...,...,...,...,...,...,...,...
317,WRG,Wrangell Airport,Wrangell,AK,USA,56.48433,-132.36982
318,WYS,Westerly State Airport,West Yellowstone,MT,USA,44.68840,-111.11764
319,XNA,Northwest Arkansas Regional Airport,Fayetteville/Springdale/Rogers,AR,USA,36.28187,-94.30681
320,YAK,Yakutat Airport,Yakutat,AK,USA,59.50336,-139.66023


In [64]:
# Original Shape
flights.shape

(5819079, 31)

## Clean Flights

In [8]:
# Clear airport list:
flights = flights[flights['ORIGIN_AIRPORT'].isin(airports)]
flights = flights[flights['DESTINATION_AIRPORT'].isin(airports)]

# Clear airline list:
flights = flights[flights['AIRLINE'].isin(airlines)]

In [9]:
# get date
#flights['DATE'] = pd.to_datetime(flights[['YEAR', 'MONTH', 'DAY']])

# Drop 
flights.drop(columns=['FLIGHT_NUMBER', 'TAIL_NUMBER', 'CANCELLATION_REASON'], inplace=True)

# Drop null
flights = flights[flights.AIR_SYSTEM_DELAY.notna()]

flights

Unnamed: 0,YEAR,MONTH,DAY,DAY_OF_WEEK,AIRLINE,ORIGIN_AIRPORT,DESTINATION_AIRPORT,SCHEDULED_DEPARTURE,DEPARTURE_TIME,DEPARTURE_DELAY,...,SCHEDULED_ARRIVAL,ARRIVAL_TIME,ARRIVAL_DELAY,DIVERTED,CANCELLED,AIR_SYSTEM_DELAY,SECURITY_DELAY,AIRLINE_DELAY,LATE_AIRCRAFT_DELAY,WEATHER_DELAY
27,2015,1,1,4,NK,MSP,FLL,115,127.0,12.0,...,542,607.0,25.0,0,0,25.0,0.0,0.0,0.0,0.0
30,2015,1,1,4,NK,PHX,ORD,125,237.0,72.0,...,549,632.0,43.0,0,0,43.0,0.0,0.0,0.0,0.0
35,2015,1,1,4,HA,LAS,HNL,145,145.0,0.0,...,555,610.0,15.0,0,0,0.0,0.0,15.0,0.0,0.0
50,2015,1,1,4,B6,BQN,MCO,307,304.0,-3.0,...,500,520.0,20.0,0,0,20.0,0.0,0.0,0.0,0.0
52,2015,1,1,4,B6,SJU,MCO,400,535.0,95.0,...,605,730.0,85.0,0,0,0.0,0.0,85.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5819023,2015,12,31,4,B6,MCO,BQN,2342,3.0,21.0,...,325,345.0,20.0,0,0,0.0,0.0,17.0,3.0,0.0
5819051,2015,12,31,4,AS,SEA,MSP,2355,11.0,16.0,...,507,535.0,28.0,0,0,12.0,0.0,16.0,0.0,0.0
5819057,2015,12,31,4,NK,FLL,BQN,2358,11.0,13.0,...,318,346.0,28.0,0,0,15.0,0.0,13.0,0.0,0.0
5819069,2015,12,31,4,B6,LAS,JFK,2359,238.0,159.0,...,741,1020.0,159.0,0,0,0.0,0.0,159.0,0.0,0.0


In [10]:
flights.isnull().sum()

YEAR                   0
MONTH                  0
DAY                    0
DAY_OF_WEEK            0
AIRLINE                0
ORIGIN_AIRPORT         0
DESTINATION_AIRPORT    0
SCHEDULED_DEPARTURE    0
DEPARTURE_TIME         0
DEPARTURE_DELAY        0
TAXI_OUT               0
WHEELS_OFF             0
SCHEDULED_TIME         0
ELAPSED_TIME           0
AIR_TIME               0
DISTANCE               0
WHEELS_ON              0
TAXI_IN                0
SCHEDULED_ARRIVAL      0
ARRIVAL_TIME           0
ARRIVAL_DELAY          0
DIVERTED               0
CANCELLED              0
AIR_SYSTEM_DELAY       0
SECURITY_DELAY         0
AIRLINE_DELAY          0
LATE_AIRCRAFT_DELAY    0
WEATHER_DELAY          0
dtype: int64

In [11]:
flights.to_csv(PATH + '/flights_clean.csv', index=False)

# Read Clean csv file

In [42]:
flights = pd.read_csv(PATH + '/flights_clean.csv')
flights

Unnamed: 0,YEAR,MONTH,DAY,DAY_OF_WEEK,AIRLINE,ORIGIN_AIRPORT,DESTINATION_AIRPORT,SCHEDULED_DEPARTURE,DEPARTURE_TIME,DEPARTURE_DELAY,...,SCHEDULED_ARRIVAL,ARRIVAL_TIME,ARRIVAL_DELAY,DIVERTED,CANCELLED,AIR_SYSTEM_DELAY,SECURITY_DELAY,AIRLINE_DELAY,LATE_AIRCRAFT_DELAY,WEATHER_DELAY
0,2015,1,1,4,NK,MSP,FLL,115,127.0,12.0,...,542,607.0,25.0,0,0,25.0,0.0,0.0,0.0,0.0
1,2015,1,1,4,NK,PHX,ORD,125,237.0,72.0,...,549,632.0,43.0,0,0,43.0,0.0,0.0,0.0,0.0
2,2015,1,1,4,HA,LAS,HNL,145,145.0,0.0,...,555,610.0,15.0,0,0,0.0,0.0,15.0,0.0,0.0
3,2015,1,1,4,B6,BQN,MCO,307,304.0,-3.0,...,500,520.0,20.0,0,0,20.0,0.0,0.0,0.0,0.0
4,2015,1,1,4,B6,SJU,MCO,400,535.0,95.0,...,605,730.0,85.0,0,0,0.0,0.0,85.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1003355,2015,12,31,4,B6,MCO,BQN,2342,3.0,21.0,...,325,345.0,20.0,0,0,0.0,0.0,17.0,3.0,0.0
1003356,2015,12,31,4,AS,SEA,MSP,2355,11.0,16.0,...,507,535.0,28.0,0,0,12.0,0.0,16.0,0.0,0.0
1003357,2015,12,31,4,NK,FLL,BQN,2358,11.0,13.0,...,318,346.0,28.0,0,0,15.0,0.0,13.0,0.0,0.0
1003358,2015,12,31,4,B6,LAS,JFK,2359,238.0,159.0,...,741,1020.0,159.0,0,0,0.0,0.0,159.0,0.0,0.0


# Group by first

### W/ Airline

In [43]:
# Groupby:
flights_g = flights.groupby(['ORIGIN_AIRPORT', 'DESTINATION_AIRPORT', 'AIRLINE']).first()
flights_g = flights_g.reset_index()
flights_g

Unnamed: 0,ORIGIN_AIRPORT,DESTINATION_AIRPORT,AIRLINE,YEAR,MONTH,DAY,DAY_OF_WEEK,SCHEDULED_DEPARTURE,DEPARTURE_TIME,DEPARTURE_DELAY,...,SCHEDULED_ARRIVAL,ARRIVAL_TIME,ARRIVAL_DELAY,DIVERTED,CANCELLED,AIR_SYSTEM_DELAY,SECURITY_DELAY,AIRLINE_DELAY,LATE_AIRCRAFT_DELAY,WEATHER_DELAY
0,ABE,ATL,DL,2015,1,6,2,635,634.0,-1.0,...,904,944.0,40.0,0,0,40.0,0.0,0.0,0.0,0.0
1,ABE,ATL,EV,2015,1,3,6,1600,1618.0,18.0,...,1820,1840.0,20.0,0,0,2.0,0.0,0.0,18.0,0.0
2,ABE,ATL,OO,2015,3,2,1,1600,1643.0,43.0,...,1808,1848.0,40.0,0,0,0.0,0.0,0.0,40.0,0.0
3,ABE,DTW,EV,2015,1,3,6,1715,1734.0,19.0,...,1904,1921.0,17.0,0,0,0.0,0.0,17.0,0.0,0.0
4,ABE,ORD,EV,2015,1,3,6,600,558.0,-2.0,...,720,750.0,30.0,0,0,30.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8114,XNA,ORD,OO,2015,1,6,2,1038,1139.0,61.0,...,1225,1328.0,63.0,0,0,2.0,0.0,0.0,61.0,0.0
8115,XNA,SFO,OO,2015,11,1,7,745,740.0,-5.0,...,947,1120.0,93.0,0,0,93.0,0.0,0.0,0.0,0.0
8116,YAK,CDV,AS,2015,1,2,5,1151,1306.0,75.0,...,1246,1350.0,64.0,0,0,0.0,0.0,0.0,64.0,0.0
8117,YAK,JNU,AS,2015,1,2,5,1830,1851.0,21.0,...,1919,1940.0,21.0,0,0,0.0,0.0,0.0,21.0,0.0


In [44]:
flights_g.to_csv(PATH + '/flights_clean_gruoped_first_with_airline.csv', index=False)

### No Airline

In [45]:
# Groupby:
flights_g = flights.groupby(['ORIGIN_AIRPORT', 'DESTINATION_AIRPORT']).first()
flights_g = flights_g.reset_index()
flights_g

Unnamed: 0,ORIGIN_AIRPORT,DESTINATION_AIRPORT,YEAR,MONTH,DAY,DAY_OF_WEEK,AIRLINE,SCHEDULED_DEPARTURE,DEPARTURE_TIME,DEPARTURE_DELAY,...,SCHEDULED_ARRIVAL,ARRIVAL_TIME,ARRIVAL_DELAY,DIVERTED,CANCELLED,AIR_SYSTEM_DELAY,SECURITY_DELAY,AIRLINE_DELAY,LATE_AIRCRAFT_DELAY,WEATHER_DELAY
0,ABE,ATL,2015,1,3,6,EV,1600,1618.0,18.0,...,1820,1840.0,20.0,0,0,2.0,0.0,0.0,18.0,0.0
1,ABE,DTW,2015,1,3,6,EV,1715,1734.0,19.0,...,1904,1921.0,17.0,0,0,0.0,0.0,17.0,0.0,0.0
2,ABE,ORD,2015,1,3,6,EV,600,558.0,-2.0,...,720,750.0,30.0,0,0,30.0,0.0,0.0,0.0,0.0
3,ABI,DFW,2015,1,2,5,MQ,1155,1303.0,68.0,...,1245,1354.0,69.0,0,0,1.0,0.0,0.0,65.0,3.0
4,ABQ,ATL,2015,1,3,6,DL,620,650.0,30.0,...,1119,1152.0,33.0,0,0,3.0,0.0,0.0,0.0,30.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4572,XNA,ORD,2015,1,1,4,EV,1451,1510.0,19.0,...,1641,1712.0,31.0,0,0,12.0,0.0,3.0,16.0,0.0
4573,XNA,SFO,2015,11,1,7,OO,745,740.0,-5.0,...,947,1120.0,93.0,0,0,93.0,0.0,0.0,0.0,0.0
4574,YAK,CDV,2015,1,2,5,AS,1151,1306.0,75.0,...,1246,1350.0,64.0,0,0,0.0,0.0,0.0,64.0,0.0
4575,YAK,JNU,2015,1,2,5,AS,1830,1851.0,21.0,...,1919,1940.0,21.0,0,0,0.0,0.0,0.0,21.0,0.0


In [46]:
flights_g.to_csv(PATH + '/flights_clean_gruoped_first.csv', index=False)

# Group by Mean

### W/ Airline

In [47]:
# Groupby:

flights_g = flights.groupby(['ORIGIN_AIRPORT', 'DESTINATION_AIRPORT', 'AIRLINE']).mean()
flights_g = flights_g.reset_index()
flights_g['number_flights']= flights.groupby(['ORIGIN_AIRPORT', 'DESTINATION_AIRPORT', 'AIRLINE']).count()['DAY_OF_WEEK'].reset_index()['DAY_OF_WEEK']
flights_g

Unnamed: 0,ORIGIN_AIRPORT,DESTINATION_AIRPORT,AIRLINE,YEAR,MONTH,DAY,DAY_OF_WEEK,SCHEDULED_DEPARTURE,DEPARTURE_TIME,DEPARTURE_DELAY,...,ARRIVAL_TIME,ARRIVAL_DELAY,DIVERTED,CANCELLED,AIR_SYSTEM_DELAY,SECURITY_DELAY,AIRLINE_DELAY,LATE_AIRCRAFT_DELAY,WEATHER_DELAY,number_flights
0,ABE,ATL,DL,2015.0,4.500000,12.866667,3.366667,626.833333,691.033333,40.200000,...,950.000000,59.333333,0.0,0.0,21.766667,0.000,33.666667,0.000000,3.900000,30
1,ABE,ATL,EV,2015.0,6.129032,14.870968,3.763441,1413.290323,1492.849462,56.763441,...,1720.677419,62.075269,0.0,0.0,9.322581,0.000,18.795699,33.956989,0.000000,93
2,ABE,ATL,OO,2015.0,4.000000,13.714286,4.357143,1600.000000,1617.785714,23.500000,...,1851.928571,35.785714,0.0,0.0,13.357143,0.000,0.857143,21.500000,0.071429,14
3,ABE,DTW,EV,2015.0,5.061069,15.557252,3.725191,1422.465649,1523.778626,66.198473,...,1689.030534,70.717557,0.0,0.0,9.267176,0.000,27.893130,33.557252,0.000000,131
4,ABE,ORD,EV,2015.0,6.248120,15.563910,3.481203,1334.037594,1424.142857,66.045113,...,1559.428571,76.781955,0.0,0.0,35.090226,0.000,15.421053,24.488722,1.781955,133
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8114,XNA,ORD,OO,2015.0,4.489796,13.530612,3.510204,1514.857143,1641.571429,78.551020,...,1833.938776,84.040816,0.0,0.0,32.489796,0.000,3.755102,47.387755,0.408163,49
8115,XNA,SFO,OO,2015.0,11.500000,13.428571,3.428571,745.000000,810.500000,34.071429,...,1060.857143,55.428571,0.0,0.0,23.571429,0.000,17.214286,0.000000,14.642857,14
8116,YAK,CDV,AS,2015.0,5.380952,14.238095,4.500000,1144.333333,1245.309524,54.309524,...,1327.047619,57.142857,0.0,0.0,4.523810,0.000,3.166667,47.642857,1.809524,42
8117,YAK,JNU,AS,2015.0,6.551724,12.793103,4.172414,1830.000000,1885.931034,38.000000,...,1971.344828,40.827586,0.0,0.0,4.448276,0.000,0.448276,35.931034,0.000000,29


In [48]:
flights_g.to_csv(PATH + '/flights_clean_gruoped_mean_with_airline.csv', index=False)

### No Airline

In [49]:
# Groupby:
flights_g = flights.groupby(['ORIGIN_AIRPORT', 'DESTINATION_AIRPORT']).mean()
flights_g = flights_g.reset_index()
flights_g['number_flights']= flights.groupby(['ORIGIN_AIRPORT', 'DESTINATION_AIRPORT']).count()['DAY_OF_WEEK'].reset_index()['DAY_OF_WEEK']
flights_g

Unnamed: 0,ORIGIN_AIRPORT,DESTINATION_AIRPORT,YEAR,MONTH,DAY,DAY_OF_WEEK,SCHEDULED_DEPARTURE,DEPARTURE_TIME,DEPARTURE_DELAY,TAXI_OUT,...,ARRIVAL_TIME,ARRIVAL_DELAY,DIVERTED,CANCELLED,AIR_SYSTEM_DELAY,SECURITY_DELAY,AIRLINE_DELAY,LATE_AIRCRAFT_DELAY,WEATHER_DELAY,number_flights
0,ABE,ATL,2015.0,5.554745,14.313869,3.737226,1260.153285,1330.036496,49.737226,20.678832,...,1565.328467,58.788321,0.0,0.0,12.459854,0.000000,20.218978,25.248175,0.861314,137
1,ABE,DTW,2015.0,5.061069,15.557252,3.725191,1422.465649,1523.778626,66.198473,20.236641,...,1689.030534,70.717557,0.0,0.0,9.267176,0.000000,27.893130,33.557252,0.000000,131
2,ABE,ORD,2015.0,6.248120,15.563910,3.481203,1334.037594,1424.142857,66.045113,16.406015,...,1559.428571,76.781955,0.0,0.0,35.090226,0.000000,15.421053,24.488722,1.781955,133
3,ABI,DFW,2015.0,5.240223,14.315642,3.821229,1173.539106,1276.000000,62.237430,12.418994,...,1380.399441,69.368715,0.0,0.0,11.879888,0.128492,23.622905,20.318436,13.418994,358
4,ABQ,ATL,2015.0,7.064935,18.051948,4.116883,969.519481,1088.311688,96.454545,22.246753,...,1566.818182,100.766234,0.0,0.0,10.454545,0.000000,72.753247,14.701299,2.857143,77
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4572,XNA,ORD,2015.0,5.214286,14.853821,3.664452,1199.878738,1315.624585,76.277409,18.867110,...,1506.486711,86.790698,0.0,0.0,28.114618,0.000000,24.606312,25.913621,8.156146,602
4573,XNA,SFO,2015.0,11.500000,13.428571,3.428571,745.000000,810.500000,34.071429,35.785714,...,1060.857143,55.428571,0.0,0.0,23.571429,0.000000,17.214286,0.000000,14.642857,14
4574,YAK,CDV,2015.0,5.380952,14.238095,4.500000,1144.333333,1245.309524,54.309524,8.976190,...,1327.047619,57.142857,0.0,0.0,4.523810,0.000000,3.166667,47.642857,1.809524,42
4575,YAK,JNU,2015.0,6.551724,12.793103,4.172414,1830.000000,1885.931034,38.000000,7.448276,...,1971.344828,40.827586,0.0,0.0,4.448276,0.000000,0.448276,35.931034,0.000000,29


In [50]:
flights_g.to_csv(PATH + '/flights_clean_gruoped_mean.csv', index=False)

# Group by Month

In [51]:
# Groupby:

flights_g = flights.groupby(['ORIGIN_AIRPORT', 'DESTINATION_AIRPORT', 'AIRLINE', 'MONTH']).mean()
flights_g = flights_g.reset_index()
flights_g['number_flights']= flights.groupby(['ORIGIN_AIRPORT', 'DESTINATION_AIRPORT', 'AIRLINE', 'MONTH']).count()['DAY_OF_WEEK'].reset_index()['DAY_OF_WEEK']
flights_g

Unnamed: 0,ORIGIN_AIRPORT,DESTINATION_AIRPORT,AIRLINE,MONTH,YEAR,DAY,DAY_OF_WEEK,SCHEDULED_DEPARTURE,DEPARTURE_TIME,DEPARTURE_DELAY,...,ARRIVAL_TIME,ARRIVAL_DELAY,DIVERTED,CANCELLED,AIR_SYSTEM_DELAY,SECURITY_DELAY,AIRLINE_DELAY,LATE_AIRCRAFT_DELAY,WEATHER_DELAY,number_flights
0,ABE,ATL,DL,1,2015.0,11.600000,3.400000,634.000000,652.400000,10.400000,...,951.400000,40.600000,0.0,0.0,29.600000,0.0,2.400000,0.000000,8.600000,5
1,ABE,ATL,DL,2,2015.0,13.000000,2.666667,632.500000,786.666667,94.166667,...,1076.333333,116.500000,0.0,0.0,29.500000,0.0,83.500000,0.000000,3.500000,6
2,ABE,ATL,DL,3,2015.0,8.857143,3.857143,640.000000,688.571429,25.714286,...,942.142857,43.142857,0.0,0.0,17.428571,0.0,22.571429,0.000000,3.142857,7
3,ABE,ATL,DL,5,2015.0,25.000000,1.000000,640.000000,638.000000,-2.000000,...,910.000000,25.000000,0.0,0.0,25.000000,0.0,0.000000,0.000000,0.000000,1
4,ABE,ATL,DL,6,2015.0,4.000000,4.000000,615.000000,612.000000,-3.000000,...,853.000000,39.000000,0.0,0.0,39.000000,0.0,0.000000,0.000000,0.000000,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
66163,YUM,PHX,OO,7,2015.0,16.000000,4.000000,1123.750000,1165.083333,31.333333,...,1277.500000,41.833333,0.0,0.0,10.666667,0.0,7.583333,23.583333,0.000000,12
66164,YUM,PHX,OO,8,2015.0,13.000000,3.588235,1244.705882,1347.235294,69.588235,...,1461.000000,73.823529,0.0,0.0,11.058824,0.0,41.235294,21.529412,0.000000,17
66165,YUM,PHX,OO,9,2015.0,14.857143,2.857143,1505.714286,1599.714286,59.714286,...,1711.857143,63.000000,0.0,0.0,10.571429,0.0,0.000000,52.428571,0.000000,7
66166,YUM,PHX,OO,11,2015.0,10.888889,4.055556,1280.000000,1331.611111,31.611111,...,1444.000000,39.333333,0.0,0.0,10.055556,0.5,8.888889,19.888889,0.000000,18


In [52]:
flights_g.to_csv(PATH + '/flights_clean_gruoped_month_with_airline.csv', index=False)

### No Airline

In [53]:
# Groupby:
flights_g = flights.groupby(['ORIGIN_AIRPORT', 'DESTINATION_AIRPORT', 'MONTH']).mean()
flights_g = flights_g.reset_index()
flights_g['number_flights']= flights.groupby(['ORIGIN_AIRPORT', 'DESTINATION_AIRPORT', 'MONTH']).count()['DAY_OF_WEEK'].reset_index()['DAY_OF_WEEK']

flights_g

Unnamed: 0,ORIGIN_AIRPORT,DESTINATION_AIRPORT,MONTH,YEAR,DAY,DAY_OF_WEEK,SCHEDULED_DEPARTURE,DEPARTURE_TIME,DEPARTURE_DELAY,TAXI_OUT,...,ARRIVAL_TIME,ARRIVAL_DELAY,DIVERTED,CANCELLED,AIR_SYSTEM_DELAY,SECURITY_DELAY,AIRLINE_DELAY,LATE_AIRCRAFT_DELAY,WEATHER_DELAY,number_flights
0,ABE,ATL,1,2015.0,11.000000,3.769231,1194.692308,1235.384615,28.384615,32.538462,...,1504.538462,49.846154,0.0,0.0,22.769231,0.0,2.538462,21.230769,3.307692,13
1,ABE,ATL,2,2015.0,17.400000,3.800000,1099.000000,1216.500000,75.500000,27.950000,...,1470.300000,85.250000,0.0,0.0,13.450000,0.0,39.650000,31.100000,1.050000,20
2,ABE,ATL,3,2015.0,9.666667,4.000000,1172.375000,1196.291667,20.583333,23.291667,...,1446.583333,42.083333,0.0,0.0,20.833333,0.0,15.500000,4.833333,0.916667,24
3,ABE,ATL,4,2015.0,10.666667,4.500000,1566.666667,1622.166667,48.833333,14.500000,...,1847.666667,51.666667,0.0,0.0,4.333333,0.0,0.666667,46.666667,0.000000,6
4,ABE,ATL,5,2015.0,19.333333,3.500000,1453.666667,1525.000000,54.666667,15.166667,...,1748.166667,61.916667,0.0,0.0,10.250000,0.0,35.166667,16.416667,0.083333,12
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43322,YUM,PHX,7,2015.0,16.000000,4.000000,1123.750000,1165.083333,31.333333,13.833333,...,1277.500000,41.833333,0.0,0.0,10.666667,0.0,7.583333,23.583333,0.000000,12
43323,YUM,PHX,8,2015.0,13.000000,3.588235,1244.705882,1347.235294,69.588235,18.705882,...,1461.000000,73.823529,0.0,0.0,11.058824,0.0,41.235294,21.529412,0.000000,17
43324,YUM,PHX,9,2015.0,14.857143,2.857143,1505.714286,1599.714286,59.714286,13.000000,...,1711.857143,63.000000,0.0,0.0,10.571429,0.0,0.000000,52.428571,0.000000,7
43325,YUM,PHX,11,2015.0,10.888889,4.055556,1280.000000,1331.611111,31.611111,23.166667,...,1444.000000,39.333333,0.0,0.0,10.055556,0.5,8.888889,19.888889,0.000000,18


In [54]:
flights_g.to_csv(PATH + '/flights_clean_gruoped_bymonth.csv', index=False)