In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [3]:
# Import dataset 
data = pd.read_csv('./data/airline.csv')

In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 41396 entries, 0 to 41395
Data columns (total 20 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   airline_name                   41396 non-null  object 
 1   link                           41396 non-null  object 
 2   title                          41396 non-null  object 
 3   author                         41396 non-null  object 
 4   author_country                 39805 non-null  object 
 5   date                           41396 non-null  object 
 6   content                        41396 non-null  object 
 7   aircraft                       1278 non-null   object 
 8   type_traveller                 2378 non-null   object 
 9   cabin_flown                    38520 non-null  object 
 10  route                          2341 non-null   object 
 11  overall_rating                 36861 non-null  float64
 12  seat_comfort_rating            33706 non-null 

In [5]:
data["airline_name"].unique()

array(['adria-airways', 'aegean-airlines', 'aer-lingus', 'aerocaribbean',
       'aeroflot-russian-airlines', 'aerolineas-argentinas', 'aeromexico',
       'aerosur', 'afriqiyah-airways', 'aigle-azur', 'air-algerie',
       'air-arabia', 'air-astana', 'air-austral', 'air-bagan',
       'air-berlin', 'air-botswana', 'air-busan', 'air-cairo-user',
       'air-canada', 'air-canada-rouge', 'air-caraibes', 'air-china',
       'air-corsica', 'air-dolomiti', 'air-europa', 'air-france',
       'air-greenland', 'air-india', 'air-india-express', 'air-koryo',
       'air-labrador', 'air-macau', 'air-madagascar', 'air-malawi',
       'air-malta', 'air-mauritius', 'air-mediterranee', 'air-memphis',
       'air-moldova', 'air-namibia', 'air-new-zealand', 'air-niugini',
       'air-north-yukons-airline', 'air-nostrum', 'air-serbia',
       'air-seychelles', 'air-tahiti-nui', 'air-transat', 'air-vanuata',
       'air-zimbabwe', 'airasia', 'airasia-x', 'airasia-zest',
       'airbaltic', 'air-blue', 'a

In [10]:
data["airline_name"].value_counts() <

spirit-airlines            False
british-airways            False
united-airlines            False
jet-airways                False
air-canada-rouge           False
                           ...  
passaredo-linhas-aereas     True
tropic-air-belize           True
petroleum-air-services      True
europe-airpost              True
vanilla-air                 True
Name: airline_name, Length: 362, dtype: bool

# Analysiere Kundenbewertungen der Swiss im Zeitraum von 2012 bis 2015

In den Jahren 2012 - 2015 betrug die jährliche Zahl der Fluggäste etwa  17 Mio. (Quelle: https://de.statista.com/statistik/daten/studie/291651/umfrage/fluggaeste-von-swiss/)

In [60]:
# Datensatz für SWISS Interantional Airlines herausfiltern
swiss = data[data["airline_name"] == "swiss-international-air-lines"]

## Preprocessing

### Data Cleaning

Zuerst werden alle Spalten entfernt, die keinen (unmittelbaren) Einfluss auf die Bewertung des Fluggastes haben 
* **link**: Link zur Datenquelle (irrelevant) -> **drop**
* **author**: Name des Autors (irrelevant) -> **drop**
* **airline_name**: Für alle Datenpunkte gleich, da nur Swiss betrachtet wird -> **drop**
* **title**: Auch für alle Datenpunkte gleich -> **drop**
* **aircraft** Leider nur für 14 der 336 Datenpunkte erfasst, der Flugzeugtyp kann aber prinzipiell die Kundenzufriedenheit beeinflussen (Beispiel A380 wird von Fluggästen sehr positiv wahrgenommen) -> Flugzeugtyp könnte aber aus anderen Datenquellen ergänzt werden, wenn Flugnummer und/oder Flugsegmente bekannt sind (FlighRadar24, interne Datenbank) -> hier aber **drop**


In [61]:
swiss = swiss.drop(columns={"link", "author", "airline_name", "title", "author_country", "aircraft"})

In [90]:
swiss[swiss["overall_rating"].isna()==True]

Unnamed: 0,date,content,type_traveller,cabin_flown,route,overall_rating,seat_comfort_rating,cabin_staff_rating,food_beverages_rating,inflight_entertainment_rating,ground_service_rating,wifi_connectivity_rating,value_money_rating,recommended
34352,2014-10-28,I've booked a J class ticket with LX from ZRH-...,,Business Class,,,0.0,1.0,0.0,0.0,,,1.0,0
34431,2014-08-20,We paid for 3 seats in Business Class and I ca...,,Business Class,,,2.0,1.0,3.0,2.0,,,1.0,0
34439,2014-08-03,26th June my colleague and I were flying from ...,,Economy,,,1.0,1.0,0.0,0.0,,,1.0,0
34454,2014-07-29,July 24 Geneva-London (City). Breaking system ...,,Economy,,,3.0,2.0,2.0,1.0,,,1.0,0
34476,2014-07-17,ZRH-NCE 08.55am departure 28 June 2014. Select...,,Economy,,,2.0,1.0,1.0,1.0,,,1.0,0
34496,2014-07-08,My wife and I booked a flight on the Edelweiss...,,Economy,,,2.0,1.0,1.0,4.0,,,2.0,0
34669,2014-02-07,Copenhagen to Hong Kong via Zurich. Flight out...,,Economy,,,3.0,1.0,3.0,3.0,,,2.0,0
34768,2013-12-13,Flew from London City to Sao Paulo via Zurich ...,,Business Class,,,3.0,1.0,1.0,1.0,,,1.0,0
34769,2013-12-12,SIN-ZRH with my husband and two small children...,,Economy,,,3.0,1.0,1.0,2.0,,,3.0,0
34794,2013-11-22,I travel about 100000 miles per year. This was...,,Economy,,,4.0,1.0,4.0,0.0,,,1.0,0


In [63]:
swiss

Unnamed: 0,date,content,type_traveller,cabin_flown,route,overall_rating,seat_comfort_rating,cabin_staff_rating,food_beverages_rating,inflight_entertainment_rating,ground_service_rating,wifi_connectivity_rating,value_money_rating,recommended
34129,2015-07-31,A very smooth flight! Pilots kept passengers u...,FamilyLeisure,Economy,Zurich to Dubai,9.0,5.0,5.0,5.0,4.0,5.0,,5.0,1
34132,2015-07-27,Brussels to Barcelona via Zurich. Brussels - Z...,Solo Leisure,Business Class,Brussels to Barcelona via Zurich,9.0,4.0,5.0,5.0,,5.0,,5.0,1
34133,2015-07-25,ORD-DXB via ZRH. Seat was good. Food was amazi...,FamilyLeisure,Economy,ORD to DXB via ZRH,8.0,5.0,4.0,4.0,4.0,4.0,3.0,5.0,1
34137,2015-07-23,Malta to London City via Zurich. The first lag...,Couple Leisure,Economy,Malta to LCY via Zurich,7.0,4.0,4.0,4.0,,3.0,,4.0,1
34138,2015-07-17,Let me keep this short and to the point. Bangk...,Couple Leisure,Economy,Bangkok to Berlin via Zurich,1.0,1.0,1.0,1.0,1.0,1.0,,1.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35459,2012-08-30,HKG-ZRH-ATH flew business class seats 5A and B...,,Business Class,,10.0,5.0,5.0,5.0,3.0,,,5.0,1
35466,2012-08-30,Travelling from NCE-ATH-NCE for the best part ...,,Economy,,8.0,4.0,5.0,2.0,0.0,,,5.0,1
35467,2012-08-28,I have flown a few times with Swiss between Lo...,,Economy,,10.0,5.0,5.0,5.0,4.0,,,4.0,1
35472,2012-08-23,Flight Hong Kong to Zurich. A night flight and...,,Business Class,,9.0,5.0,5.0,4.0,4.0,,,5.0,1


### Fehlende Werte