In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime as dt

# Working on Ride Sharing CSV File

In [2]:
file_path_ride_sharing = '/content/drive/MyDrive/AI/Data Analysis/DA with Python/ride_sharing_new.csv'

In [3]:
ride_sharing = pd.read_csv(file_path_ride_sharing, delimiter=',')
ride_sharing.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25760 entries, 0 to 25759
Data columns (total 10 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Unnamed: 0       25760 non-null  int64 
 1   duration         25760 non-null  object
 2   station_A_id     25760 non-null  int64 
 3   station_A_name   25760 non-null  object
 4   station_B_id     25760 non-null  int64 
 5   station_B_name   25760 non-null  object
 6   bike_id          25760 non-null  int64 
 7   user_type        25760 non-null  int64 
 8   user_birth_year  25760 non-null  int64 
 9   user_gender      25760 non-null  object
dtypes: int64(6), object(4)
memory usage: 2.0+ MB


In [4]:
ride_sharing.describe()

Unnamed: 0.1,Unnamed: 0,station_A_id,station_B_id,bike_id,user_type,user_birth_year
count,25760.0,25760.0,25760.0,25760.0,25760.0,25760.0
mean,12879.5,31.023602,89.558579,4107.621467,2.008385,1983.054969
std,7436.415803,26.409263,105.144103,1576.315767,0.704541,10.010992
min,0.0,3.0,3.0,11.0,1.0,1901.0
25%,6439.75,15.0,21.0,3106.0,2.0,1978.0
50%,12879.5,21.0,58.0,4821.0,2.0,1985.0
75%,19319.25,67.0,93.0,5257.0,3.0,1990.0
max,25759.0,81.0,383.0,6638.0,3.0,2001.0


In [5]:
ride_sharing

Unnamed: 0.1,Unnamed: 0,duration,station_A_id,station_A_name,station_B_id,station_B_name,bike_id,user_type,user_birth_year,user_gender
0,0,12 minutes,81,Berry St at 4th St,323,Broadway at Kearny,5480,2,1959,Male
1,1,24 minutes,3,Powell St BART Station (Market St at 4th St),118,Eureka Valley Recreation Center,5193,2,1965,Male
2,2,8 minutes,67,San Francisco Caltrain Station 2 (Townsend St...,23,The Embarcadero at Steuart St,3652,3,1993,Male
3,3,4 minutes,16,Steuart St at Market St,28,The Embarcadero at Bryant St,1883,1,1979,Male
4,4,11 minutes,22,Howard St at Beale St,350,8th St at Brannan St,4626,2,1994,Male
...,...,...,...,...,...,...,...,...,...,...
25755,25755,11 minutes,15,San Francisco Ferry Building (Harry Bridges Pl...,34,Father Alfred E Boeddeker Park,5063,1,2000,Male
25756,25756,10 minutes,15,San Francisco Ferry Building (Harry Bridges Pl...,34,Father Alfred E Boeddeker Park,5411,2,1998,Male
25757,25757,14 minutes,15,San Francisco Ferry Building (Harry Bridges Pl...,42,San Francisco City Hall (Polk St at Grove St),5157,2,1995,Male
25758,25758,14 minutes,15,San Francisco Ferry Building (Harry Bridges Pl...,42,San Francisco City Hall (Polk St at Grove St),4438,2,1995,Male


## Converting Duration column into integer

In [6]:
ride_sharing['duration_trim'] = ride_sharing['duration'].str.strip('minutes')
ride_sharing['duration_trim']

0        12 
1        24 
2         8 
3         4 
4        11 
        ... 
25755    11 
25756    10 
25757    14 
25758    14 
25759    29 
Name: duration_trim, Length: 25760, dtype: object

In [7]:
ride_sharing['duration_time'] = ride_sharing['duration_trim'].astype('int')
ride_sharing['duration_time']

0        12
1        24
2         8
3         4
4        11
         ..
25755    11
25756    10
25757    14
25758    14
25759    29
Name: duration_time, Length: 25760, dtype: int64

In [8]:
assert ride_sharing['duration_time'].dtype == int

In [9]:
ride_sharing[['duration', 'duration_trim', 'duration_time']]

Unnamed: 0,duration,duration_trim,duration_time
0,12 minutes,12,12
1,24 minutes,24,24
2,8 minutes,8,8
3,4 minutes,4,4
4,11 minutes,11,11
...,...,...,...
25755,11 minutes,11,11
25756,10 minutes,10,10
25757,14 minutes,14,14
25758,14 minutes,14,14


In [10]:
ride_sharing['duration_time'].mean()

11.389052795031056

## Converting Usertype column into Category

In [11]:
ride_sharing['user_type']

0        2
1        2
2        3
3        1
4        2
        ..
25755    1
25756    2
25757    2
25758    2
25759    3
Name: user_type, Length: 25760, dtype: int64

In [12]:
ride_sharing['user_type'].unique()

array([2, 3, 1])

In [13]:
ride_sharing['user_type'] = ride_sharing['user_type'].astype(int)
ride_sharing.loc[ride_sharing['user_type'] > 3, 'user_type'] = 0
ride_sharing['user_type'].unique()

array([2, 3, 1])

In [14]:
ride_sharing['user_type'].dtype

dtype('int64')

In [15]:
ride_sharing['user_type'] = ride_sharing['user_type'].astype('category')
ride_sharing['user_type'].dtype

CategoricalDtype(categories=[1, 2, 3], ordered=False)

## Converting birth_year column into date time

In [16]:
ride_sharing['user_birth_year'].dtype

dtype('int64')

In [17]:
ride_sharing['user_birth_year'] = pd.to_datetime(ride_sharing['user_birth_year'])
ride_sharing['user_birth_year'].dtype

dtype('<M8[ns]')

In [18]:
# Get today date
today_date = dt.date.today()
today_date

# Get Max time of the day
max_time = dt.datetime.max.time()
max_time

# Combine both
combine_date_time = dt.datetime.combine(today_date, max_time)
print(combine_date_time)

ride_sharing.loc[ride_sharing['user_birth_year'] > combine_date_time, 'user_birth_year'] = today_date
print(ride_sharing['user_birth_year'].max())


2021-09-29 23:59:59.999999
1970-01-01 00:00:00.000002001


## Finding Duplicates

In [19]:
duplicates = ride_sharing.duplicated(subset='bike_id', keep=False)
duplicates.unique(), duplicates.value_counts()

(array([ True, False]), True     25717
 False       43
 dtype: int64)

In [20]:
duplicates

0        True
1        True
2        True
3        True
4        True
         ... 
25755    True
25756    True
25757    True
25758    True
25759    True
Length: 25760, dtype: bool

In [21]:
duplicated_rides = ride_sharing[duplicates].sort_values('bike_id')
duplicated_rides

Unnamed: 0.1,Unnamed: 0,duration,station_A_id,station_A_name,station_B_id,station_B_name,bike_id,user_type,user_birth_year,user_gender,duration_trim,duration_time
3638,3638,12 minutes,22,Howard St at Beale St,350,8th St at Brannan St,11,1,1970-01-01 00:00:00.000001988,Female,12,12
6088,6088,5 minutes,67,San Francisco Caltrain Station 2 (Townsend St...,63,Bryant St at 6th St,11,2,1970-01-01 00:00:00.000001985,Male,5,5
10857,10857,4 minutes,3,Powell St BART Station (Market St at 4th St),47,4th St at Harrison St,11,1,1970-01-01 00:00:00.000001987,Male,4,4
10045,10045,13 minutes,3,Powell St BART Station (Market St at 4th St),112,Harrison St at 17th St,27,3,1970-01-01 00:00:00.000001989,Male,13,13
16104,16104,10 minutes,22,Howard St at Beale St,30,San Francisco Caltrain (Townsend St at 4th St),27,2,1970-01-01 00:00:00.000001970,Male,10,10
...,...,...,...,...,...,...,...,...,...,...,...,...
8812,8812,10 minutes,5,Powell St BART Station (Market St at 5th St),345,Hubbell St at 16th St,6638,2,1970-01-01 00:00:00.000001986,Female,10,10
6815,6815,5 minutes,21,Montgomery St BART Station (Market St at 2nd St),343,Bryant St at 2nd St,6638,2,1970-01-01 00:00:00.000001995,Female,5,5
8456,8456,7 minutes,67,San Francisco Caltrain Station 2 (Townsend St...,19,Post St at Kearny St,6638,1,1970-01-01 00:00:00.000001983,Male,7,7
8300,8300,6 minutes,16,Steuart St at Market St,36,Folsom St at 3rd St,6638,2,1970-01-01 00:00:00.000001962,Male,6,6


In [22]:
duplicated_rides[['bike_id', 'duration', 'user_birth_year']]

Unnamed: 0,bike_id,duration,user_birth_year
3638,11,12 minutes,1970-01-01 00:00:00.000001988
6088,11,5 minutes,1970-01-01 00:00:00.000001985
10857,11,4 minutes,1970-01-01 00:00:00.000001987
10045,27,13 minutes,1970-01-01 00:00:00.000001989
16104,27,10 minutes,1970-01-01 00:00:00.000001970
...,...,...,...
8812,6638,10 minutes,1970-01-01 00:00:00.000001986
6815,6638,5 minutes,1970-01-01 00:00:00.000001995
8456,6638,7 minutes,1970-01-01 00:00:00.000001983
8300,6638,6 minutes,1970-01-01 00:00:00.000001962


## Dropping Complet Duplicated from Ride Sharing

In [23]:
ride_drop_duplicate = ride_sharing.drop_duplicates()
ride_drop_duplicate

Unnamed: 0.1,Unnamed: 0,duration,station_A_id,station_A_name,station_B_id,station_B_name,bike_id,user_type,user_birth_year,user_gender,duration_trim,duration_time
0,0,12 minutes,81,Berry St at 4th St,323,Broadway at Kearny,5480,2,1970-01-01 00:00:00.000001959,Male,12,12
1,1,24 minutes,3,Powell St BART Station (Market St at 4th St),118,Eureka Valley Recreation Center,5193,2,1970-01-01 00:00:00.000001965,Male,24,24
2,2,8 minutes,67,San Francisco Caltrain Station 2 (Townsend St...,23,The Embarcadero at Steuart St,3652,3,1970-01-01 00:00:00.000001993,Male,8,8
3,3,4 minutes,16,Steuart St at Market St,28,The Embarcadero at Bryant St,1883,1,1970-01-01 00:00:00.000001979,Male,4,4
4,4,11 minutes,22,Howard St at Beale St,350,8th St at Brannan St,4626,2,1970-01-01 00:00:00.000001994,Male,11,11
...,...,...,...,...,...,...,...,...,...,...,...,...
25755,25755,11 minutes,15,San Francisco Ferry Building (Harry Bridges Pl...,34,Father Alfred E Boeddeker Park,5063,1,1970-01-01 00:00:00.000002,Male,11,11
25756,25756,10 minutes,15,San Francisco Ferry Building (Harry Bridges Pl...,34,Father Alfred E Boeddeker Park,5411,2,1970-01-01 00:00:00.000001998,Male,10,10
25757,25757,14 minutes,15,San Francisco Ferry Building (Harry Bridges Pl...,42,San Francisco City Hall (Polk St at Grove St),5157,2,1970-01-01 00:00:00.000001995,Male,14,14
25758,25758,14 minutes,15,San Francisco Ferry Building (Harry Bridges Pl...,42,San Francisco City Hall (Polk St at Grove St),4438,2,1970-01-01 00:00:00.000001995,Male,14,14


In [24]:
ride_drop_duplicate[['bike_id', 'duration', 'user_birth_year']].sort_values('bike_id')

Unnamed: 0,bike_id,duration,user_birth_year
10857,11,4 minutes,1970-01-01 00:00:00.000001987
6088,11,5 minutes,1970-01-01 00:00:00.000001985
3638,11,12 minutes,1970-01-01 00:00:00.000001988
3431,27,10 minutes,1970-01-01 00:00:00.000001988
2747,27,6 minutes,1970-01-01 00:00:00.000001992
...,...,...,...
8456,6638,7 minutes,1970-01-01 00:00:00.000001983
8812,6638,10 minutes,1970-01-01 00:00:00.000001986
6815,6638,5 minutes,1970-01-01 00:00:00.000001995
8380,6638,8 minutes,1970-01-01 00:00:00.000001984


In [25]:
ride_uniq = ride_drop_duplicate.groupby('bike_id')
print(ride_uniq)
ride_uniq.describe()

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7f9ee775d650>


Unnamed: 0_level_0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,station_A_id,station_A_id,station_A_id,station_A_id,station_A_id,station_A_id,station_A_id,station_A_id,station_B_id,station_B_id,station_B_id,station_B_id,station_B_id,station_B_id,station_B_id,station_B_id,duration_time,duration_time,duration_time,duration_time,duration_time,duration_time,duration_time,duration_time
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max
bike_id,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2,Unnamed: 31_level_2,Unnamed: 32_level_2
11,3.0,6861.000000,3671.053936,3638.0,4863.00,6088.0,8472.50,10857.0,3.0,30.666667,32.868425,3.0,12.50,22.0,44.50,67.0,3.0,153.333333,170.506109,47.0,55.00,63.0,206.50,350.0,3.0,7.000000,4.358899,4.0,4.50,5.0,8.50,12.0
27,18.0,13404.722222,7910.772364,888.0,5521.75,15097.0,18372.25,25307.0,18.0,28.555556,23.121730,3.0,15.00,22.0,30.00,67.0,18.0,68.333333,39.030908,15.0,30.75,65.5,99.75,126.0,18.0,12.500000,4.655800,6.0,10.00,11.0,15.25,25.0
37,12.0,10026.583333,5629.998990,3968.0,4978.75,8613.0,14530.75,20294.0,12.0,27.500000,24.088851,3.0,13.25,22.0,30.00,81.0,12.0,61.833333,94.330978,5.0,20.50,29.0,55.25,350.0,12.0,7.000000,3.074824,2.0,5.75,6.5,8.00,13.0
38,9.0,14801.555556,7790.238830,5768.0,7464.00,14642.0,19540.00,25650.0,9.0,36.555556,30.553687,5.0,15.00,22.0,67.00,81.0,9.0,84.444444,117.218931,6.0,24.00,50.0,81.00,383.0,9.0,8.666667,4.000000,4.0,6.00,9.0,10.00,17.0
43,7.0,9310.714286,7251.762124,2416.0,3397.50,4930.0,16876.50,17281.0,7.0,37.142857,33.228645,3.0,10.00,22.0,67.00,81.0,7.0,73.142857,95.361468,14.0,19.00,55.0,60.50,284.0,7.0,12.285714,3.093773,9.0,9.50,12.0,15.00,16.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6147,18.0,17980.055556,5176.177780,6688.0,12682.25,21004.0,21718.75,23344.0,18.0,31.277778,27.822559,3.0,15.00,21.0,57.75,81.0,18.0,132.500000,133.011388,6.0,35.00,66.5,246.00,356.0,18.0,9.888889,3.998366,4.0,7.00,9.5,11.75,18.0
6234,19.0,12927.631579,8611.706440,1823.0,6169.50,10519.0,21292.50,24669.0,19.0,30.421053,26.986243,3.0,15.00,21.0,48.50,81.0,19.0,154.000000,140.747212,6.0,50.00,93.0,304.00,383.0,19.0,16.789474,31.302430,2.0,5.00,8.0,13.00,142.0
6235,40.0,12172.250000,7277.186579,130.0,6154.00,14189.5,18168.75,21801.0,40.0,37.350000,30.335348,3.0,15.00,22.0,67.00,81.0,40.0,106.475000,118.956572,3.0,20.00,67.0,111.00,375.0,40.0,8.550000,4.373287,1.0,5.00,9.0,10.25,18.0
6315,1.0,20831.000000,,20831.0,20831.00,20831.0,20831.00,20831.0,1.0,21.000000,,21.0,21.00,21.0,21.00,21.0,1.0,50.000000,,50.0,50.00,50.0,50.00,50.0,1.0,7.000000,,7.0,7.00,7.0,7.00,7.0


# Working on Airlines_final CSV File

In [26]:
file_path_airline = '/content/drive/MyDrive/AI/Data Analysis/DA with Python/airlines_final.csv'

In [27]:
airlines = pd.read_csv(file_path_airline)
airlines

Unnamed: 0.1,Unnamed: 0,id,day,airline,destination,dest_region,dest_size,boarding_area,dept_time,wait_min,cleanliness,safety,satisfaction
0,0,1351,Tuesday,UNITED INTL,KANSAI,Asia,Hub,Gates 91-102,2018-12-31,115.0,Clean,Neutral,Very satisfied
1,1,373,Friday,ALASKA,SAN JOSE DEL CABO,Canada/Mexico,Small,Gates 50-59,2018-12-31,135.0,Clean,Very safe,Very satisfied
2,2,2820,Thursday,DELTA,LOS ANGELES,West US,Hub,Gates 40-48,2018-12-31,70.0,Average,Somewhat safe,Neutral
3,3,1157,Tuesday,SOUTHWEST,LOS ANGELES,West US,Hub,Gates 20-39,2018-12-31,190.0,Clean,Very safe,Somewhat satsified
4,4,2992,Wednesday,AMERICAN,MIAMI,East US,Hub,Gates 50-59,2018-12-31,559.0,Somewhat clean,Very safe,Somewhat satsified
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2472,2804,1475,Tuesday,ALASKA,NEW YORK-JFK,East US,Hub,Gates 50-59,2018-12-31,280.0,Somewhat clean,Neutral,Somewhat satsified
2473,2805,2222,Thursday,SOUTHWEST,PHOENIX,West US,Hub,Gates 20-39,2018-12-31,165.0,Clean,Very safe,Very satisfied
2474,2806,2684,Friday,UNITED,ORLANDO,East US,Hub,Gates 70-90,2018-12-31,92.0,Clean,Very safe,Very satisfied
2475,2807,2549,Tuesday,JETBLUE,LONG BEACH,West US,Small,Gates 1-12,2018-12-31,95.0,Clean,Somewhat safe,Very satisfied


In [28]:
airlines.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2477 entries, 0 to 2476
Data columns (total 13 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Unnamed: 0     2477 non-null   int64  
 1   id             2477 non-null   int64  
 2   day            2477 non-null   object 
 3   airline        2477 non-null   object 
 4   destination    2477 non-null   object 
 5   dest_region    2477 non-null   object 
 6   dest_size      2477 non-null   object 
 7   boarding_area  2477 non-null   object 
 8   dept_time      2477 non-null   object 
 9   wait_min       2477 non-null   float64
 10  cleanliness    2477 non-null   object 
 11  safety         2477 non-null   object 
 12  satisfaction   2477 non-null   object 
dtypes: float64(1), int64(2), object(10)
memory usage: 251.7+ KB


In [29]:
airlines.columns

Index(['Unnamed: 0', 'id', 'day', 'airline', 'destination', 'dest_region',
       'dest_size', 'boarding_area', 'dept_time', 'wait_min', 'cleanliness',
       'safety', 'satisfaction'],
      dtype='object')

### Print Uniq values of Survey columns

In [31]:
print("Cleanliness: ", airlines['cleanliness'].unique())
print("Safety: ", airlines['safety'].unique())
print("Satisfaction: ", airlines['satisfaction'].unique())

Cleanliness:  ['Clean' 'Average' 'Somewhat clean' 'Somewhat dirty' 'Dirty']
Safety:  ['Neutral' 'Very safe' 'Somewhat safe' 'Very unsafe' 'Somewhat unsafe']
Satisfaction:  ['Very satisfied' 'Neutral' 'Somewhat satsified' 'Somewhat unsatisfied'
 'Very unsatisfied']


# Print Uniq values of Destination region and size

In [33]:
print("Desination Region: ", airlines['dest_region'].unique())
print("Destination Size: ", airlines['dest_size'].unique())

Desination Region:  ['Asia' 'Canada/Mexico' 'West US' 'East US' 'Midwest US' 'EAST US'
 'Middle East' 'Europe' 'eur' 'Central/South America'
 'Australia/New Zealand' 'middle east']
Destination Size:  ['Hub' 'Small' '    Hub' 'Medium' 'Large' 'Hub     ' '    Small'
 'Medium     ' '    Medium' 'Small     ' '    Large' 'Large     ']


### Converting Wait Stay time to Short, Medium, Long

In [34]:
label_ranges = [0, 60, 180, np.inf]
label_names = ['short', 'medium', 'long']

airlines['wait_type'] = pd.cut(airlines['wait_min'],
                               bins=label_ranges,
                               labels=label_names)
airlines['wait_type']

0       medium
1       medium
2       medium
3         long
4         long
         ...  
2472      long
2473    medium
2474    medium
2475    medium
2476      long
Name: wait_type, Length: 2477, dtype: category
Categories (3, object): ['short' < 'medium' < 'long']

### Converting Days into Weekday or Weekend

In [35]:
airlines['day'].unique()

array(['Tuesday', 'Friday', 'Thursday', 'Wednesday', 'Saturday', 'Sunday',
       'Monday'], dtype=object)

In [38]:
days_mapping = {'Monday':'weekday', 'Tuesday':'weekday', 'Wednesday':'weekday', 'Thursday':'weekday',
                'Friday':'weekday', 'Saturday':'weekend', 'Sunday':'weekend'}

airlines['day_week'] = airlines['day'].replace(days_mapping)
airlines['day_week']

0       weekday
1       weekday
2       weekday
3       weekday
4       weekday
         ...   
2472    weekday
2473    weekday
2474    weekday
2475    weekday
2476    weekend
Name: day_week, Length: 2477, dtype: object

In [39]:
airlines['day_week'].unique()

array(['weekday', 'weekend'], dtype=object)

In [40]:
airlines['day_week'].value_counts()

weekday    2000
weekend     477
Name: day_week, dtype: int64

In [41]:
assert airlines['day_week'].str.contains('Ms.|Mr.|Miss|Dr.').any() == False

In [42]:
airlines

Unnamed: 0.1,Unnamed: 0,id,day,airline,destination,dest_region,dest_size,boarding_area,dept_time,wait_min,cleanliness,safety,satisfaction,wait_type,day_week
0,0,1351,Tuesday,UNITED INTL,KANSAI,Asia,Hub,Gates 91-102,2018-12-31,115.0,Clean,Neutral,Very satisfied,medium,weekday
1,1,373,Friday,ALASKA,SAN JOSE DEL CABO,Canada/Mexico,Small,Gates 50-59,2018-12-31,135.0,Clean,Very safe,Very satisfied,medium,weekday
2,2,2820,Thursday,DELTA,LOS ANGELES,West US,Hub,Gates 40-48,2018-12-31,70.0,Average,Somewhat safe,Neutral,medium,weekday
3,3,1157,Tuesday,SOUTHWEST,LOS ANGELES,West US,Hub,Gates 20-39,2018-12-31,190.0,Clean,Very safe,Somewhat satsified,long,weekday
4,4,2992,Wednesday,AMERICAN,MIAMI,East US,Hub,Gates 50-59,2018-12-31,559.0,Somewhat clean,Very safe,Somewhat satsified,long,weekday
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2472,2804,1475,Tuesday,ALASKA,NEW YORK-JFK,East US,Hub,Gates 50-59,2018-12-31,280.0,Somewhat clean,Neutral,Somewhat satsified,long,weekday
2473,2805,2222,Thursday,SOUTHWEST,PHOENIX,West US,Hub,Gates 20-39,2018-12-31,165.0,Clean,Very safe,Very satisfied,medium,weekday
2474,2806,2684,Friday,UNITED,ORLANDO,East US,Hub,Gates 70-90,2018-12-31,92.0,Clean,Very safe,Very satisfied,medium,weekday
2475,2807,2549,Tuesday,JETBLUE,LONG BEACH,West US,Small,Gates 1-12,2018-12-31,95.0,Clean,Somewhat safe,Very satisfied,medium,weekday
