# Airline Dataset

In [191]:
import pandas as pd
import numpy as np
import seaborn as sns                       #visualisation
import matplotlib.pyplot as plt             #visualisation
import math
import stats
import warnings
from sklearn.linear_model import LinearRegression
warnings.filterwarnings('ignore')

## 2. Loading the data into the data frame.

In [192]:
df = pd.read_csv("airline_data.csv")
airline_data = df
# To display the top 5 rows
airline_data.head()

Unnamed: 0,Passanger_Name,Flying_month,Route,Rating,Verified,Review_title,Review_content,Traveller_type,Class
0,Paige Boet,Jun-23,New Orleans to London,1.0,Trip Verified,The airline lost my luggage,The airline lost my luggage and was absolutely...,Solo Leisure,Economy Class
1,S Layne,Mar-23,London to Amman,1.0,Trip Verified,fully refunded by our travel insurance,"We booked on the BA website, round trip flight...",Couple Leisure,Business Class
2,E Lanewoski,Heathrow to Bodrum,Business Class,2.0,Trip Verified,no boarding drinks provided,"First time flying with BA business class, neve...",A321 neo,Solo Leisure
3,Joel Burman,Jun-23,Amman to London,4.0,Not Verified,WiFi didn't work,You can buy sandwiches and crisps but don't ex...,Solo Leisure,Economy Class
4,R Vines,London City to Ibiza,Business Class,7.0,Trip Verified,stick with economy,This is a two-for-one review covering economy ...,Embraer 190,Family Leisure


In [193]:
airline_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3580 entries, 0 to 3579
Data columns (total 9 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Passanger_Name  3580 non-null   object 
 1   Flying_month    2815 non-null   object 
 2   Route           2816 non-null   object 
 3   Rating          3575 non-null   float64
 4   Verified        1270 non-null   object 
 5   Review_title    3580 non-null   object 
 6   Review_content  3580 non-null   object 
 7   Traveller_type  3580 non-null   object 
 8   Class           3579 non-null   object 
dtypes: float64(1), object(8)
memory usage: 251.8+ KB


In [194]:
airline_data.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Rating,3575.0,4.79049,3.170323,1.0,2.0,4.0,8.0,10.0


# 2. Cleaning Dataset

In [195]:
percentage_null = (airline_data.isnull().sum() / len(airline_data))*100
percentage_null

Passanger_Name     0.000000
Flying_month      21.368715
Route             21.340782
Rating             0.139665
Verified          64.525140
Review_title       0.000000
Review_content     0.000000
Traveller_type     0.000000
Class              0.027933
dtype: float64

In [196]:
airline_data.isnull().sum()

Passanger_Name       0
Flying_month       765
Route              764
Rating               5
Verified          2310
Review_title         0
Review_content       0
Traveller_type       0
Class                1
dtype: int64

In [197]:
# Drop "Verified" column as it has more than 60% of null values 
airline_data.drop('Verified',axis=1, inplace = True)

In [198]:
percentage_null = (airline_data.isnull().sum() / len(airline_data))*100
percentage_null

Passanger_Name     0.000000
Flying_month      21.368715
Route             21.340782
Rating             0.139665
Review_title       0.000000
Review_content     0.000000
Traveller_type     0.000000
Class              0.027933
dtype: float64

In [199]:
airline_data.groupby('Traveller_type').count()                                                         

Unnamed: 0_level_0,Passanger_Name,Flying_month,Route,Rating,Review_title,Review_content,Class
Traveller_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
767-300,1,1,1,1,1,1,1
777,3,3,3,3,3,3,3
777-200,1,1,1,1,1,1,1
777-300,1,1,1,1,1,1,1
787,1,1,1,1,1,1,1
...,...,...,...,...,...,...,...
Saab 2000,1,1,1,1,1,1,1
Solo Leisure,307,307,307,307,307,307,307
Various,1,1,1,1,1,1,1
boeing 787,1,1,1,1,1,1,1


### 2.1 Class: Created Class Fixed Feature

In [200]:
airline_data.groupby('Class').count()

Unnamed: 0_level_0,Passanger_Name,Flying_month,Route,Rating,Review_title,Review_content,Traveller_type
Class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Business,430,430,430,430,430,430,430
Business Class,183,183,183,183,183,183,183
Couple Leisure,641,641,641,641,641,641,641
Economy Class,676,675,676,676,676,676,676
Family Leisure,234,234,234,234,234,234,234
First Class,11,11,11,11,11,11,11
LHR to ORD,1,1,1,1,1,1,1
London to Malaga,1,1,1,1,1,1,1
Los Angeles to London to Paris to Rome,1,1,1,1,1,1,1
Premium Economy,85,85,85,85,85,85,85


In [201]:
#Create New Column to modify as the column could have data usefull for others
airline_data['Class_fix'] = airline_data['Class']

In [202]:
#Copy useful data from Route Column
Route_to_fix = airline_data[(airline_data['Class'] != 'Premium Economy') & (airline_data['Class'] != 'Business Class') & (airline_data['Class'] != 'First Class') & (airline_data['Class'] != 'Economy Class')]['Route']

In [203]:
#Paste Data in the new Class Column, filtering by rows without relevant information for Class
airline_data['Class_fix'][(airline_data['Class_fix'] != 'Premium Economy') & (airline_data['Class_fix'] != 'Business Class') & (airline_data['Class_fix'] != 'First Class_fix') & (airline_data['Class_fix'] != 'Economy Class')] = Route_to_fix

In [204]:
#Copy useful data from Traveller_type
Traveller_to_fix = airline_data[(airline_data['Class_fix'] != 'Premium Economy') & (airline_data['Class_fix'] != 'Business Class') & (airline_data['Class_fix'] != 'First Class') & (airline_data['Class_fix'] != 'Economy Class')]['Traveller_type']

In [205]:
#Paste Data in the new Class Column, filtering by rows without relevant information for Class
airline_data['Class_fix'][(airline_data['Class_fix'] != 'Premium Economy') & (airline_data['Class_fix'] != 'Business Class') & (airline_data['Class_fix'] != 'First Class') & (airline_data['Class_fix'] != 'Economy Class')] = Traveller_to_fix

In [206]:
#Last 13 Rows are first Class, there were a space that produced the error in that Rows, but mnually we can fix it easily
airline_data[(airline_data['Class_fix'] != 'Premium Economy') & (airline_data['Class_fix'] != 'Business Class') & (airline_data['Class_fix'] != 'First Class') & (airline_data['Class_fix'] != 'Economy Class')] = 'First Class'

In [207]:
#Corrobarting all our values are Class denominatinos
airline_data.groupby('Class_fix').count()

Unnamed: 0_level_0,Passanger_Name,Flying_month,Route,Rating,Review_title,Review_content,Traveller_type,Class
Class_fix,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Business Class,1161,867,867,1160,1161,1161,1161,1161
Economy Class,1849,1521,1522,1846,1849,1849,1849,1849
First Class,212,157,157,211,212,212,212,212
Premium Economy,358,271,271,358,358,358,358,358


In [208]:
# 2.2 Flying Month

In [209]:
### Flying Month: Created Class Fixed Feature
airline_data.groupby('Flying_month').count()

Unnamed: 0_level_0,Passanger_Name,Route,Rating,Review_title,Review_content,Traveller_type,Class,Class_fix
Flying_month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
ACC to ZRH via LHR,1,1,1,1,1,1,1,1
AGP to LGW,1,1,1,1,1,1,1,1
AMS to BKK via LHR,1,1,1,1,1,1,1,1
AMS to GRU via LHR,1,1,1,1,1,1,1,1
AMS to HKG via LHR,1,1,1,1,1,1,1,1
...,...,...,...,...,...,...,...,...
doha to londonDoha to London,1,1,1,1,1,1,1,1
iAH to LHR,1,1,1,1,1,1,1,1
lgw to alicante,1,1,1,1,1,1,1,1
no,8,8,8,8,8,8,8,8


In [210]:
airline_data['Route'].fillna('None')

0       New Orleans to London
1             London to Amman
2              Business Class
3             Amman to London
4              Business Class
                ...          
3575                     None
3576                     None
3577                     None
3578                     None
3579                     None
Name: Route, Length: 3580, dtype: object

In [211]:
#Create New Column to modify as the column could have data usefull for others
airline_data['Route_fix'] = airline_data['Route']

In [212]:
#Copy useful data from Route Column
Copy_flying = airline_data[(airline_data['Route_fix'] == 'Premium Economy') | (airline_data['Route_fix'] == 'Business Class') | (
                airline_data['Route_fix'] == 'First Class') | (airline_data['Route_fix'] == 'Economy Class')]['Flying_month']

In [213]:
Copy_flying

2                Heathrow to Bodrum
4              London City to Ibiza
6               Amsterdam to London
7       London Heathrow to Kalamata
16               Santiago to London
                   ...             
2809                 Kiev to London
2811        London-Vancouver return
2812                        LHR-ORD
2815                            yes
2829                    First Class
Name: Flying_month, Length: 1871, dtype: object

In [219]:
#Paste Data in the new Class Column, filtering by rows without relevant information for Class
airline_data[(airline_data['Route_fix'] == 'Premium Economy') | (airline_data['Route_fix'] == 'Business Class') | (
                airline_data['Route_fix'] == 'First Class') | (airline_data['Route_fix'] == 'Economy Class')]['Route_fix'] = Copy_flying

In [220]:
airline_data[~airline_data['Route_fix'].str.contains('to', na=False)]

Unnamed: 0,Passanger_Name,Flying_month,Route,Rating,Review_title,Review_content,Traveller_type,Class,Class_fix,Route_fix
2,E Lanewoski,Heathrow to Bodrum,Business Class,2.0,no boarding drinks provided,"First time flying with BA business class, neve...",A321 neo,Solo Leisure,Business Class,Business Class
4,R Vines,London City to Ibiza,Business Class,7.0,stick with economy,This is a two-for-one review covering economy ...,Embraer 190,Family Leisure,Business Class,Business Class
6,C Dean,Amsterdam to London,Business Class,1.0,delays and cancellations,Having experienced delays and cancellations de...,A350,Business,Business Class,Business Class
7,Richard Hodges,London Heathrow to Kalamata,Economy Class,7.0,Economy class seating was truly dreadful,Travelled to Heathrow to Kalamata and return j...,Boeing 737,Couple Leisure,Economy Class,Economy Class
16,A Garlen,Santiago to London,Economy Class,3.0,Most uncomfortable flight,Most uncomfortable flight I have ever experien...,Boeing 787,Family Leisure,Economy Class,Economy Class
...,...,...,...,...,...,...,...,...,...,...
3575,W Benson,,,4.0,British Airways customer review,LHR-HKG on Boeing 747 - 23/08/12. Much has bee...,Economy Class,no,Economy Class,
3576,S Luqman,,,4.0,British Airways customer review,Just got back from Bridgetown Barbados flying ...,Economy Class,no,Economy Class,
3577,D Smith,,,4.0,British Airways customer review,LHR-JFK-LAX-LHR. Check in was ok apart from be...,Economy Class,no,Economy Class,
3578,W Benson,,,6.0,British Airways customer review,HKG-LHR in New Club World on Boeing 777-300 - ...,Business Class,yes,Business Class,


In [218]:
#Corrobarting all our values are Class denominatinos
airline_data.groupby('Route_fix').count()

Unnamed: 0_level_0,Passanger_Name,Flying_month,Route,Rating,Review_title,Review_content,Traveller_type,Class,Class_fix
Route_fix,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
London Heathrow to Madrid,1,1,1,1,1,1,1,1,1
ABV to LHR,1,1,1,1,1,1,1,1,1
ABZ to SFO via LHR,1,1,1,1,1,1,1,1,1
ACC to LHR,1,1,1,1,1,1,1,1,1
AGP to LGW,1,1,1,1,1,1,1,1,1
...,...,...,...,...,...,...,...,...,...
Zagreb to London,1,1,1,1,1,1,1,1,1
Zagreb to London Heathrow,1,1,1,1,1,1,1,1,1
Zurich to London,5,5,5,5,5,5,5,5,5
Zurich to London Heathrow,2,2,2,2,2,2,2,2,2


In [137]:
airline_data

Unnamed: 0,Passanger_Name,Flying_month,Route,Rating,Review_title,Review_content,Traveller_type,Class,Class_fix,Route_fix
0,Paige Boet,Jun-23,New Orleans to London,1.0,The airline lost my luggage,The airline lost my luggage and was absolutely...,Solo Leisure,Economy Class,Economy Class,New Orleans to London
1,S Layne,Mar-23,London to Amman,1.0,fully refunded by our travel insurance,"We booked on the BA website, round trip flight...",Couple Leisure,Business Class,Business Class,London to Amman
2,E Lanewoski,Heathrow to Bodrum,Business Class,2.0,no boarding drinks provided,"First time flying with BA business class, neve...",A321 neo,Solo Leisure,Solo Leisure,Business Class
3,Joel Burman,Jun-23,Amman to London,4.0,WiFi didn't work,You can buy sandwiches and crisps but don't ex...,Solo Leisure,Economy Class,Economy Class,Amman to London
4,R Vines,London City to Ibiza,Business Class,7.0,stick with economy,This is a two-for-one review covering economy ...,Embraer 190,Family Leisure,Family Leisure,Business Class
...,...,...,...,...,...,...,...,...,...,...
3575,W Benson,,,4.0,British Airways customer review,LHR-HKG on Boeing 747 - 23/08/12. Much has bee...,Economy Class,no,no,
3576,S Luqman,,,4.0,British Airways customer review,Just got back from Bridgetown Barbados flying ...,Economy Class,no,no,
3577,D Smith,,,4.0,British Airways customer review,LHR-JFK-LAX-LHR. Check in was ok apart from be...,Economy Class,no,no,
3578,W Benson,,,6.0,British Airways customer review,HKG-LHR in New Club World on Boeing 777-300 - ...,Business Class,yes,yes,
