In [1]:
import pandas as pd
from ibm_watson import NaturalLanguageUnderstandingV1
from ibm_watson.natural_language_understanding_v1 import Features, SentimentOptions
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
import statsmodels.formula.api as smf

In [2]:
# Setting up the IBM Watson NLU service
authenticator = IAMAuthenticator('2JQNn0KEKomFqt0Id-cQTkH6z4nHn5KZqTLLUNFaFcuh')
natural_language_understanding = NaturalLanguageUnderstandingV1(
    version='2022-04-07',
    authenticator=authenticator)
natural_language_understanding.set_service_url('https://api.eu-gb.natural-language-understanding.watson.cloud.ibm.com/instances/d255cf53-8629-4af0-9ecd-560ec0047a3e')

In [3]:
# Try different encodings to read the CSV file
try:
    data_1 = pd.read_csv('easyJet.csv', encoding='utf-8')
    data_2 = pd.read_csv('ryanair.csv', encoding='utf-8')
    data_3 = pd.read_csv('wizz-air.csv', encoding='utf-8')
    data_4 = pd.read_csv('airfrance.csv', encoding='utf-8')
    data_5 = pd.read_csv('british.csv', encoding='utf-8')
    data_6 = pd.read_csv('lufthansa.csv', encoding='utf-8')
except UnicodeDecodeError:
    data_1 = pd.read_csv('easyJet.csv', encoding='ISO-8859-1')
    data_2 = pd.read_csv('ryanair.csv', encoding='ISO-8859-1')
    data_3 = pd.read_csv('wizz-air.csv', encoding='ISO-8859-1')
    data_4 = pd.read_csv('airfrance.csv', encoding='ISO-8859-1')
    data_5 = pd.read_csv('british.csv', encoding='ISO-8859-1')
    data_6 = pd.read_csv('lufthansa.csv', encoding='ISO-8859-1')

In [4]:
data_6.head()

Unnamed: 0,status,aircraft,travel_type,travel_class,route,date,seating_comfort,staff_service,food_quality,entertainment,wifi,ground_service,value_for_money,recommended,overall_rating,review
0,✅ Trip Verified,A320,Couple Leisure,Economy Class,Rome to Frankfurt,Jun-24,2.0,5.0,,,,2.0,1,no,1,"I paid for emergency exit seats, Lufthansa c..."
1,✅ Trip Verified,A320,Family Leisure,Economy Class,Venice to Frankfurt,Jul-24,4.0,1.0,1.0,,3.0,2.0,2,no,2,Flight was delayed by about 1 hour. An excus...
2,✅ Trip Verified,,Couple Leisure,Economy Class,Munich to Malaga,Jul-24,1.0,1.0,,,,1.0,1,no,1,Flight cancelled due to a delay leading to th...
3,✅ Trip Verified,,Business,Economy Class,Palermo to Hamburg,Jun-24,3.0,4.0,,,,1.0,4,no,3,Our recent experience with Lufthansa has been...
4,✅ Trip Verified,,Couple Leisure,Economy Class,Istanbul to New York via Frankfurt,Jun-24,2.0,4.0,1.0,1.0,1.0,2.0,2,no,2,I stopped flying Lufthansa 15 years ago and n...


In [4]:
# Defining IBM Watson NLU sentiment analysis functions
def watson_sentiment(review):
    try:
        response = natural_language_understanding.analyze(
            text=review,
            features=Features(sentiment=SentimentOptions())
        ).get_result()
        sentiment_label = response['sentiment']['document']['label']
        sentiment_score = response['sentiment']['document']['score']
        return sentiment_label, sentiment_score
    except Exception as e:
        print(f"Error analyzing review: {review}")
        print(e)
        return None, None

In [6]:
# Applying IBM Watson NLU sentiment analysis: easyJet
data_1['watson_sentiment'], data_1['watson_score'] = zip(*data_1['review'].apply(watson_sentiment))

# Save the results to a new file
data_1.to_csv('easyJet_with_sentiment.csv', index=False, encoding='utf-8-sig')

print(data_1[['review', 'watson_sentiment', 'watson_score']].head())

                                              review watson_sentiment  \
0   Had not flown EasyJet for over a year because...         negative   
1     Given the basic fare cost with no hold bagg...         negative   
2   My EasyJet flights from Oporto to Ajaccio wer...         negative   
3     EasyJet are scammers. We were never informe...         negative   
4    I have been travelling for 5 years, between ...         negative   

   watson_score  
0     -0.850861  
1     -0.806221  
2     -0.384695  
3     -0.618626  
4     -0.724085  


In [7]:
# Applying IBM Watson NLU sentiment analysis: Ryanair
data_2['watson_sentiment'], data_2['watson_score'] = zip(*data_2['review'].apply(watson_sentiment))

# Save the results to a new file
data_2.to_csv('ryanair_with_sentiment.csv', index=False, encoding='utf-8-sig')

print(data_2[['review', 'watson_sentiment', 'watson_score']].head())

                                              review watson_sentiment  \
0     I purchased Priority & 2 Cabin Bags at the ...         negative   
1    On 19th July, there was an IT glitch which i...         negative   
2     I wish I had never sought to save money try...         negative   
3    On 19.07.24 due to worldwide IT issues cause...         negative   
4   Black Friday travel, as all computer systems ...         negative   

   watson_score  
0     -0.882141  
1     -0.607847  
2     -0.811564  
3     -0.899610  
4     -0.441713  


In [8]:
# Applying IBM Watson NLU sentiment analysis: Wizz-air
data_3['watson_sentiment'], data_3['watson_score'] = zip(*data_3['review'].apply(watson_sentiment))

# Save the results to a new file
data_3.to_csv('wizzair_with_sentiment.csv', index=False, encoding='utf-8-sig')

print(data_3[['review', 'watson_sentiment', 'watson_score']].head())

                                              review watson_sentiment  \
0    Flew with them 7 times so far, every single ...         negative   
1     They cancelled the flight and did not offer...         negative   
2   My dissatisfaction with the airport transfer ...         negative   
3    Wizz Air kept the charge for luggage I didn'...         negative   
4   I recently flew with Wizz Air and it was the ...         negative   

   watson_score  
0     -0.981492  
1     -0.823826  
2     -0.650168  
3     -0.634190  
4     -0.815420  


In [2]:
# Read Low-cost Airline Data
try:
    data_lowcost = pd.read_csv('low_cost.csv', encoding='utf-8')
except UnicodeDecodeError:
    data_lowcost = pd.read_csv('low_cost.csv', encoding='ISO-8859-1')
    
data_lowcost.head()

Unnamed: 0,status,aircraft,travel_type,travel_class,route,date,seating_comfort,staff_service,food_quality,entertainment,...,value_for_money,recommended,overall_rating,review,watson_sentiment,watson_score,covid_19,company_age,log_company_size,log_net_profit
0,Not Verified,,Business,Economy Class,London Gatwick to Bordeaux,24-Jun,2.0,2.0,,,...,1.0,no,1,Had not flown EasyJet for over a year because...,negative,-0.850861,0,29,8.552367,5.783825
1,Not Verified,A320,Business,Economy Class,Bristol to Edinburgh,24-May,1.0,1.0,,,...,1.0,no,1,Given the basic fare cost with no hold bagg...,negative,-0.806221,0,29,8.552367,5.783825
2,? Trip Verified,A320NEO,Solo Leisure,Economy Class,Porto to Ajaccio via Geneva,24-May,3.0,4.0,,,...,5.0,yes,7,My EasyJet flights from Oporto to Ajaccio wer...,negative,-0.384695,0,29,8.552367,5.783825
3,Not Verified,A320,Couple Leisure,Economy Class,Gatwick to Tenerife,24-May,2.0,5.0,,2.0,...,2.0,no,2,? ?EasyJet are scammers. We were never informe...,negative,-0.618626,0,29,8.552367,5.783825
4,? Trip Verified,,Solo Leisure,Economy Class,Hurghada to Gatwick,24-May,1.0,1.0,1.0,1.0,...,1.0,no,3,"I have been travelling for 5 years, between ...",negative,-0.724085,0,29,8.552367,5.783825


In [3]:
#Linear regression analysis： Low-cost 
#covid_19+company_age+log_company_size+log_net_profit
model = smf.ols(formula='watson_score~seating_comfort+staff_service+ground_service+value_for_money', data=data_lowcost)
results = model.fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:           watson_score   R-squared:                       0.632
Model:                            OLS   Adj. R-squared:                  0.632
Method:                 Least Squares   F-statistic:                     1131.
Date:                Sun, 28 Jul 2024   Prob (F-statistic):               0.00
Time:                        22:50:03   Log-Likelihood:                -888.59
No. Observations:                2637   AIC:                             1787.
Df Residuals:                    2632   BIC:                             1817.
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                      coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------
Intercept          -1.1908      0.013    -

In [5]:
# Applying IBM Watson NLU sentiment analysis: Air France
data_4['watson_sentiment'], data_4['watson_score'] = zip(*data_4['review'].apply(watson_sentiment))

# Save the results to a new file
data_4.to_csv('airfrance_with_sentiment.csv', index=False, encoding='utf-8-sig')

print(data_4[['review', 'watson_sentiment', 'watson_score']].head())

                                              review watson_sentiment  \
0    Disappointing Premium Economy onboard experi...         negative   
1   I am very disappointed with this airline and ...         negative   
2    They delay the first flight and the second f...         negative   
3    Boarding was delayed on the first leg, and o...         negative   
4    My family of 4 and I are long-time, loyal De...         negative   

   watson_score  
0     -0.679261  
1     -0.859648  
2     -0.882312  
3     -0.737017  
4     -0.752723  


In [6]:
# Applying IBM Watson NLU sentiment analysis: British Airways
data_5['watson_sentiment'], data_5['watson_score'] = zip(*data_5['review'].apply(watson_sentiment))

# Save the results to a new file
data_5.to_csv('british_with_sentiment.csv', index=False, encoding='utf-8-sig')

print(data_5[['review', 'watson_sentiment', 'watson_score']].head())

                                              review watson_sentiment  \
0   If you can’t fly First Class don’t fly Britis...         negative   
1     Singapore to Heathrow. Business class on an...         negative   
2     I reported my damaged/ruined suitcase 5 wee...         negative   
3     On March 1st, I flew from Berlin to São Pau...         negative   
4   The WORST customer experience! British Airway...         negative   

   watson_score  
0     -0.912821  
1     -0.659135  
2     -0.906865  
3     -0.782213  
4     -0.737737  


In [7]:
# Applying IBM Watson NLU sentiment analysis: Lufthansa
data_6['watson_sentiment'], data_6['watson_score'] = zip(*data_6['review'].apply(watson_sentiment))

# Save the results to a new file
data_6.to_csv('lufthansa_with_sentiment.csv', index=False, encoding='utf-8-sig')

print(data_6[['review', 'watson_sentiment', 'watson_score']].head())

                                              review watson_sentiment  \
0    I paid for emergency exit seats, Lufthansa c...         negative   
1    Flight was delayed by about 1 hour. An excus...         negative   
2   Flight cancelled due to a delay leading to th...         negative   
3   Our recent experience with Lufthansa has been...         negative   
4   I stopped flying Lufthansa 15 years ago and n...         negative   

   watson_score  
0     -0.464124  
1     -0.660650  
2     -0.891089  
3     -0.869067  
4     -0.782492  


In [4]:
#Read Traditional Airline Data
try:
    data_traditional = pd.read_csv('traditional.csv', encoding='utf-8')
except UnicodeDecodeError:
    data_traditional = pd.read_csv('traditional.csv', encoding='ISO-8859-1')
    
data_traditional.head()

Unnamed: 0,status,aircraft,travel_type,travel_class,route,date,seating_comfort,staff_service,food_quality,entertainment,...,value_for_money,recommended,overall_rating,review,watson_sentiment,watson_score,covid_19,company_age,log_company_size,log_net_profit
0,? Trip Verified,Boeing 777,Family Leisure,Premium Economy,Paris to Toronto,24-Jul,2.0,1.0,1.0,1.0,...,3,no,5,Disappointing Premium Economy onboard experi...,negative,-0.679261,0,91,9.826499,6.990257
1,? Trip Verified,,Solo Leisure,Economy Class,New York to Paris,24-Jan,1.0,1.0,,,...,1,no,1,I am very disappointed with this airline and ...,negative,-0.859648,0,91,9.826499,6.990257
2,? Trip Verified,,Solo Leisure,Economy Class,Hamburg to Singapore via Paris,24-Jul,1.0,1.0,,,...,1,no,1,They delay the first flight and the second f...,negative,-0.882312,0,91,9.826499,6.990257
3,? Trip Verified,A321 / Boeing 777-300ER,Couple Leisure,Economy Class,Lisbon to Singapore via Paris,24-Jul,4.0,2.0,2.0,3.0,...,3,no,5,"Boarding was delayed on the first leg, and o...",negative,-0.737017,0,91,9.826499,6.990257
4,? Trip Verified,,Family Leisure,Economy Class,Los Angeles to Barcelona via Paris,24-Jul,3.0,2.0,3.0,2.0,...,1,no,3,"My family of 4 and I are long-time, loyal De...",negative,-0.752723,0,91,9.826499,6.990257


In [6]:
#Linear regression analysis: traditional 
#covid_19+company_age+log_company_size+log_net_profit
model_2 = smf.ols(formula='watson_score~seating_comfort+staff_service+ground_service+value_for_money+covid_19+company_age+log_company_size+log_net_profit', data=data_traditional)
results = model_2.fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:           watson_score   R-squared:                       0.671
Model:                            OLS   Adj. R-squared:                  0.670
Method:                 Least Squares   F-statistic:                     702.8
Date:                Sun, 28 Jul 2024   Prob (F-statistic):               0.00
Time:                        22:50:56   Log-Likelihood:                -1224.5
No. Observations:                2761   AIC:                             2467.
Df Residuals:                    2752   BIC:                             2520.
Df Model:                           8                                         
Covariance Type:            nonrobust                                         
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------
Intercept           -0.7903      0.163  