## Web Scraping

In [None]:
# necessary libraries
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import pandas as pd
import re
from time import sleep

In [None]:
# dataframe to store the data
columns = ['Title',
 'Author',
 'Country',
 'Date',
 'Verified',
 'Comment',
 'Type Of Traveller',
 'Seat Type',
 'Route',
 'Date Flown',
 'Seat Comfort',
 'Cabin Staff Service',
 'Food & Beverages',
 'Inflight Entertainment',
 'Ground Service',
 'Wifi & Connectivity',
 'Value For Money',
 'Recommended']

df = pd.DataFrame(columns=columns)
df

In [None]:
# function for getting data from one post
def get_comment(soup):
    data = {} # storing in one dictionary
    # Title of the Comment
    data["Title"] = soup.find('h2', class_='text_header').text
    # The Name of the commenter 
    data["Author"] = soup.find('span', attrs={'itemprop': "name"}).text
    
    # The Country of the Commenter
    # it's written in one element (need to scrape it without getting the text of child elements)
    # also [1:-1] -> because country was written in parathesis
    country = soup.find("h3", class_='userStatusWrapper')
    data["Country"] = "".join(country.find_all(string=True, recursive=False)).strip()[1:-1]
    
    # Date of the comment (taken from datetime attribute of the time element)
    data["Date"] = soup.find('time')['datetime']

    # extracting the comment
    text_content = soup.find('div', class_='text_content')
    # checking whether the comment is verified or not
    try:
        if text_content.find('strong').text.strip() == 'Trip Verified':
            data["Verified"] = True
        else:
            data["Verified"] = False
    except:
        data['Verified'] = 'Not Specified'
    # the comment itself same as country (without getting the text of the child element)
    data["Comment"] = "".join(text_content.find_all(string=True, recursive=False))
    data["Comment"] = data["Comment"][data['Comment'].find("|")+1:].strip()
    
    # getting the review stats from the table
    # adding to the dictionary one by one
    review_stats = soup.find('div', class_='review-stats')
    review_stats = review_stats.find_all('tr')
    for i in review_stats:
        try:
            data[i.find('td', class_='review-rating-header').text] = i.find('td', class_='review-value').text
        except: 
            try:
                star_num = 0
                tds = i.find('td', class_='review-rating-stars')
                tds = tds.find_all('span')
                for td in tds:
                    if 'fill' in td['class']:
                        star_num +=1
                data[i.find('td', class_='review-rating-header').text] = star_num
            except:
                data[i.find('td', class_='review-rating-header').text] = None
    return data

In [None]:
# accessing the given url about british airways
driver = webdriver.Chrome()
page_num = 355

while(True):
    url = "https://www.airlinequality.com/airline-reviews/british-airways/page/{}/".format(page_num)
    driver.get(url)
    sleep(2)
    page_source = driver.page_source
    soup = BeautifulSoup(page_source, 'html')

    num_of_reviews = int(soup.find('div', class_='pagination-total').text.split(" ")[-2])
    comments = soup.find_all('div', class_='body')
    for com in comments:
        data = get_comment(com)
        df.loc[len(df.index)] = data
    if num_of_reviews <= len(df.index):
        break
    page_num+=1
df

In [None]:
df.drop_duplicates(inplace=True)

## Roberta Sentiment Analysis

In [None]:
from transformers import AutoModelForSequenceClassification
from transformers import TFAutoModelForSequenceClassification
from transformers import AutoTokenizer, AutoConfig
import numpy as np
from scipy.special import softmax
# Preprocess text (username and link placeholders)
def preprocess(text):
    new_text = []
    for t in text.split(" "):
        t = '@user' if t.startswith('@') and len(t) > 1 else t
        t = 'http' if t.startswith('http') else t
        new_text.append(t)
    return " ".join(new_text)
MODEL = f"cardiffnlp/twitter-roberta-base-sentiment-latest"
tokenizer = AutoTokenizer.from_pretrained(MODEL)
config = AutoConfig.from_pretrained(MODEL)
# PT
model = AutoModelForSequenceClassification.from_pretrained(MODEL)
#model.save_pretrained(MODEL)
def sentiment(text):
    text = preprocess(text)
    encoded_input = tokenizer(text, return_tensors='pt')
    output = model(**encoded_input)
    scores = output[0][0].detach().numpy()
    scores = softmax(scores)
    # # TF
    # model = TFAutoModelForSequenceClassification.from_pretrained(MODEL)
    # model.save_pretrained(MODEL)
    # text = "Covid cases are increasing fast!"
    # encoded_input = tokenizer(text, return_tensors='tf')
    # output = model(encoded_input)
    # scores = output[0][0].numpy()
    # scores = softmax(scores)
    # Print labels and scores
    ranking = np.argsort(scores)
    ranking = ranking[::-1]
    result = {}
    for i in range(scores.shape[0]):
        l = config.id2label[ranking[i]]
        s = scores[ranking[i]]
        result[l] = s
    return result

In [None]:
df['positive'] = ""
df['neutral'] = ""
df['negative'] = ""

for index, row in df.iterrows():
    try:
        results = sentiment(row['Comment'])
        df.at[index, 'positive'] = results['positive']
        df.at[index, 'neutral'] = results['neutral']
        df.at[index, 'negative'] = results['negative']
    except:
        print("-------------\n"+ index +" -> too long\n ------------")
        continue
    print(index)

In [None]:
df.to_excel("british_airways_sentiment.xlsx")

# Topic Modeling (BERTopic)

In [7]:
# Data processing
import pandas as pd
import numpy as np

# Text preprocessiong
import nltk
nltk.download('stopwords')
nltk.download('omw-1.4')
nltk.download('wordnet')
wn = nltk.WordNetLemmatizer()

# Topic model
from bertopic import BERTopic

# Dimension reduction
from umap import UMAP

[nltk_data] Downloading package stopwords to /Users/aydan/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /Users/aydan/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package wordnet to /Users/aydan/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
  @numba.jit()
  @numba.jit()
  @numba.jit()
  @numba.jit()


In [8]:
df = pd.read_excel("british_airways_sentiment.xlsx")
df

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Title,Author,Country,Date,Verified,Comment,Type Of Traveller,Seat Type,...,Cabin Staff Service,Food & Beverages,Inflight Entertainment,Ground Service,Wifi & Connectivity,Value For Money,Recommended,positive,neutral,negative
0,0,0,"""Total garbage""",Cosmin Stefanescu,Romania,2023-06-16,False,"I flew with numerous airlines, but I gotta adm...",Solo Leisure,Economy Class,...,4.0,3.0,3.0,1.0,1.0,1,no,0.007509,0.064647,0.927844
1,1,1,"""arrived at the airport only 1 hour before""",Emmeline Reichert,United States,2023-06-13,True,We were traveling as a family (5 people). Beca...,Family Leisure,Economy Class,...,5.0,4.0,,1.0,,3,no,0.043761,0.566126,0.390113
2,2,2,"""so callous and uncaring""",Jamie Gooding,Australia,2023-06-12,True,Flight at 8.40am from DUB to LCY cancelled 115...,Solo Leisure,Economy Class,...,1.0,,,1.0,,1,no,0.008636,0.141682,0.849682
3,3,3,"""uncomfy old planes""",Michael Hayward,United Kingdom,2023-06-11,True,Terrible. I have traveled twice with them this...,Business,Business Class,...,1.0,1.0,1.0,1.0,1.0,1,no,0.005496,0.048446,0.946058
4,4,4,"""never fly British Airway ever again""",George W Edmonds,United States,2023-06-11,True,The customer service is ugly. Tried calling tw...,Solo Leisure,Economy Class,...,,,,1.0,,1,no,0.005227,0.056151,0.938622
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3452,3452,3573,British Airways customer review,Colin Pay,United Kingdom,2014-05-11,Not Specified,Rating : 10/10 Cabin Flown Economy Value for M...,,Economy Class,...,5.0,5.0,1.0,,,5,yes,0.551082,0.439142,0.009776
3453,3453,3574,British Airways customer review,Jeff Suykerbuyk,Belgium,2014-05-11,Not Specified,08/5/2014 JFK-LHR-BRU. After the perfect outbo...,,Business Class,...,5.0,5.0,4.0,,,5,yes,0.234582,0.507833,0.257585
3454,3454,3575,British Airways customer review,Andrew Allen,United Kingdom,2014-05-11,Not Specified,Belfast-Heathrow-Frankfurt return. Very impres...,,Business Class,...,5.0,5.0,2.0,,,5,yes,0.954508,0.039826,0.005665
3455,3455,3576,British Airways customer review,Jeff Suykerbuyk,Belgium,2014-05-11,Not Specified,2/5/2014 BRU-LHR-JFK. Short flight in business...,,Business Class,...,5.0,4.0,4.0,,,5,yes,0.914147,0.081130,0.004723


In [9]:
# Remove stopwords
stopwords = nltk.corpus.stopwords.words('english')
print(f'There are {len(stopwords)} default stopwords. They are {stopwords}')

There are 179 default stopwords. They are ['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'no

In [10]:
# Remove stopwords
df['review_without_stopwords'] = df['Comment'].apply(lambda x: ' '.join([w for w in x.split() if w.lower() not in stopwords]))

# Lemmatization
df['review_lemmatized'] = df['review_without_stopwords'].apply(lambda x: ' '.join([wn.lemmatize(w) for w in x.split() if w not in stopwords]))

df.reset_index(inplace=True)
# Take a look at the data
df.head()

Unnamed: 0.2,index,Unnamed: 0.1,Unnamed: 0,Title,Author,Country,Date,Verified,Comment,Type Of Traveller,...,Inflight Entertainment,Ground Service,Wifi & Connectivity,Value For Money,Recommended,positive,neutral,negative,review_without_stopwords,review_lemmatized
0,0,0,0,"""Total garbage""",Cosmin Stefanescu,Romania,2023-06-16,False,"I flew with numerous airlines, but I gotta adm...",Solo Leisure,...,3.0,1.0,1.0,1,no,0.007509,0.064647,0.927844,"flew numerous airlines, gotta admit, British A...","flew numerous airlines, gotta admit, British A..."
1,1,1,1,"""arrived at the airport only 1 hour before""",Emmeline Reichert,United States,2023-06-13,True,We were traveling as a family (5 people). Beca...,Family Leisure,...,,1.0,,3,no,0.043761,0.566126,0.390113,"traveling family (5 people). accident airport,...","traveling family (5 people). accident airport,..."
2,2,2,2,"""so callous and uncaring""",Jamie Gooding,Australia,2023-06-12,True,Flight at 8.40am from DUB to LCY cancelled 115...,Solo Leisure,...,,1.0,,1,no,0.008636,0.141682,0.849682,Flight 8.40am DUB LCY cancelled 1155pm night b...,Flight 8.40am DUB LCY cancelled 1155pm night b...
3,3,3,3,"""uncomfy old planes""",Michael Hayward,United Kingdom,2023-06-11,True,Terrible. I have traveled twice with them this...,Business,...,1.0,1.0,1.0,1,no,0.005496,0.048446,0.946058,Terrible. traveled twice year via business cla...,Terrible. traveled twice year via business cla...
4,4,4,4,"""never fly British Airway ever again""",George W Edmonds,United States,2023-06-11,True,The customer service is ugly. Tried calling tw...,Solo Leisure,...,,1.0,,1,no,0.005227,0.056151,0.938622,customer service ugly. Tried calling two weeks...,customer service ugly. Tried calling two week ...


In [13]:
# Initiate UMAP
umap_model = UMAP(n_components=5, 
                  min_dist=0.0, 
                  metric='cosine', 
                  random_state=100)

# Initiate BERTopic
topic_model = BERTopic(umap_model=umap_model, language="english", calculate_probabilities=True, nr_topics=15)

# Run BERTopic model
topics, probabilities = topic_model.fit_transform(df['review_lemmatized'])

In [14]:
# Get the list of topics
topic_model.get_topic_info()

Unnamed: 0,Topic,Count,Name,Representation,Representative_Docs
0,-1,1846,-1_flight_seat_ba_service,"[flight, seat, ba, service, food, crew, cabin,...",[Seoul Incheon London Heathrow. first flight B...
1,0,514,0_class_seat_flight_ba,"[class, seat, flight, ba, service, business, f...",[London Hong Kong premium economy. flying BA m...
2,1,439,1_flight_ba_customer_told,"[flight, ba, customer, told, british, day, air...",[itinerary supposed Las Vegas-Chicago-London-V...
3,2,195,2_good_flight_crew_excellent,"[good, flight, crew, excellent, seat, food, se...",[BA got everything right. Allowed evening chec...
4,3,164,3_london_flight_heathrow_british,"[london, flight, heathrow, british, airways, s...",[New York JFK London Gatwick British Airways. ...
5,4,130,4_gatwick_flight_seat_ba,"[gatwick, flight, seat, ba, service, lounge, c...",[Gatwick Lima return Business Class. choice se...
6,5,45,5_belfast_flight_lisbon_heathrow,"[belfast, flight, lisbon, heathrow, london, lu...",[Flew London Heathrow Belfast George Best Airp...
7,6,29,6_athens_flight_london_food,"[athens, flight, london, food, heathrow, hour,...",[London Athens. British Airways glorified budg...
8,7,19,7_malaga_gatwick_food_ba,"[malaga, gatwick, food, ba, cabin, trolley, fl...",[London Malaga. terminally ill airline packed ...
9,8,18,8_doha_qatar_class_business,"[doha, qatar, class, business, crew, bahrain, ...",[Bahrain Boston via London. First time flying ...


In [15]:
# Get the topic predictions
topic_prediction = topic_model.topics_[:]

# Save the predictions in the dataframe
df['topic_prediction'] = topic_prediction

# Take a look at the data
df.head()

Unnamed: 0.2,index,Unnamed: 0.1,Unnamed: 0,Title,Author,Country,Date,Verified,Comment,Type Of Traveller,...,Ground Service,Wifi & Connectivity,Value For Money,Recommended,positive,neutral,negative,review_without_stopwords,review_lemmatized,topic_prediction
0,0,0,0,"""Total garbage""",Cosmin Stefanescu,Romania,2023-06-16,False,"I flew with numerous airlines, but I gotta adm...",Solo Leisure,...,1.0,1.0,1,no,0.007509,0.064647,0.927844,"flew numerous airlines, gotta admit, British A...","flew numerous airlines, gotta admit, British A...",1
1,1,1,1,"""arrived at the airport only 1 hour before""",Emmeline Reichert,United States,2023-06-13,True,We were traveling as a family (5 people). Beca...,Family Leisure,...,1.0,,3,no,0.043761,0.566126,0.390113,"traveling family (5 people). accident airport,...","traveling family (5 people). accident airport,...",-1
2,2,2,2,"""so callous and uncaring""",Jamie Gooding,Australia,2023-06-12,True,Flight at 8.40am from DUB to LCY cancelled 115...,Solo Leisure,...,1.0,,1,no,0.008636,0.141682,0.849682,Flight 8.40am DUB LCY cancelled 1155pm night b...,Flight 8.40am DUB LCY cancelled 1155pm night b...,1
3,3,3,3,"""uncomfy old planes""",Michael Hayward,United Kingdom,2023-06-11,True,Terrible. I have traveled twice with them this...,Business,...,1.0,1.0,1,no,0.005496,0.048446,0.946058,Terrible. traveled twice year via business cla...,Terrible. traveled twice year via business cla...,-1
4,4,4,4,"""never fly British Airway ever again""",George W Edmonds,United States,2023-06-11,True,The customer service is ugly. Tried calling tw...,Solo Leisure,...,1.0,,1,no,0.005227,0.056151,0.938622,customer service ugly. Tried calling two weeks...,customer service ugly. Tried calling two week ...,1


In [16]:
df_topic = topic_model.get_topic_info()
for index, row in df_topic.iterrows():
    df_topic.at[index, 'NameClear'] = "-".join(row['Name'].split('_')[1:])
df_topic

Unnamed: 0,Topic,Count,Name,Representation,Representative_Docs,NameClear
0,-1,1846,-1_flight_seat_ba_service,"[flight, seat, ba, service, food, crew, cabin,...",[Seoul Incheon London Heathrow. first flight B...,flight-seat-ba-service
1,0,514,0_class_seat_flight_ba,"[class, seat, flight, ba, service, business, f...",[London Hong Kong premium economy. flying BA m...,class-seat-flight-ba
2,1,439,1_flight_ba_customer_told,"[flight, ba, customer, told, british, day, air...",[itinerary supposed Las Vegas-Chicago-London-V...,flight-ba-customer-told
3,2,195,2_good_flight_crew_excellent,"[good, flight, crew, excellent, seat, food, se...",[BA got everything right. Allowed evening chec...,good-flight-crew-excellent
4,3,164,3_london_flight_heathrow_british,"[london, flight, heathrow, british, airways, s...",[New York JFK London Gatwick British Airways. ...,london-flight-heathrow-british
5,4,130,4_gatwick_flight_seat_ba,"[gatwick, flight, seat, ba, service, lounge, c...",[Gatwick Lima return Business Class. choice se...,gatwick-flight-seat-ba
6,5,45,5_belfast_flight_lisbon_heathrow,"[belfast, flight, lisbon, heathrow, london, lu...",[Flew London Heathrow Belfast George Best Airp...,belfast-flight-lisbon-heathrow
7,6,29,6_athens_flight_london_food,"[athens, flight, london, food, heathrow, hour,...",[London Athens. British Airways glorified budg...,athens-flight-london-food
8,7,19,7_malaga_gatwick_food_ba,"[malaga, gatwick, food, ba, cabin, trolley, fl...",[London Malaga. terminally ill airline packed ...,malaga-gatwick-food-ba
9,8,18,8_doha_qatar_class_business,"[doha, qatar, class, business, crew, bahrain, ...",[Bahrain Boston via London. First time flying ...,doha-qatar-class-business


In [17]:
merged = df.merge(df_topic, how='left', left_on='topic_prediction', right_on="Topic")
merged

Unnamed: 0.2,index,Unnamed: 0.1,Unnamed: 0,Title,Author,Country,Date,Verified,Comment,Type Of Traveller,...,negative,review_without_stopwords,review_lemmatized,topic_prediction,Topic,Count,Name,Representation,Representative_Docs,NameClear
0,0,0,0,"""Total garbage""",Cosmin Stefanescu,Romania,2023-06-16,False,"I flew with numerous airlines, but I gotta adm...",Solo Leisure,...,0.927844,"flew numerous airlines, gotta admit, British A...","flew numerous airlines, gotta admit, British A...",1,1,439,1_flight_ba_customer_told,"[flight, ba, customer, told, british, day, air...",[itinerary supposed Las Vegas-Chicago-London-V...,flight-ba-customer-told
1,1,1,1,"""arrived at the airport only 1 hour before""",Emmeline Reichert,United States,2023-06-13,True,We were traveling as a family (5 people). Beca...,Family Leisure,...,0.390113,"traveling family (5 people). accident airport,...","traveling family (5 people). accident airport,...",-1,-1,1846,-1_flight_seat_ba_service,"[flight, seat, ba, service, food, crew, cabin,...",[Seoul Incheon London Heathrow. first flight B...,flight-seat-ba-service
2,2,2,2,"""so callous and uncaring""",Jamie Gooding,Australia,2023-06-12,True,Flight at 8.40am from DUB to LCY cancelled 115...,Solo Leisure,...,0.849682,Flight 8.40am DUB LCY cancelled 1155pm night b...,Flight 8.40am DUB LCY cancelled 1155pm night b...,1,1,439,1_flight_ba_customer_told,"[flight, ba, customer, told, british, day, air...",[itinerary supposed Las Vegas-Chicago-London-V...,flight-ba-customer-told
3,3,3,3,"""uncomfy old planes""",Michael Hayward,United Kingdom,2023-06-11,True,Terrible. I have traveled twice with them this...,Business,...,0.946058,Terrible. traveled twice year via business cla...,Terrible. traveled twice year via business cla...,-1,-1,1846,-1_flight_seat_ba_service,"[flight, seat, ba, service, food, crew, cabin,...",[Seoul Incheon London Heathrow. first flight B...,flight-seat-ba-service
4,4,4,4,"""never fly British Airway ever again""",George W Edmonds,United States,2023-06-11,True,The customer service is ugly. Tried calling tw...,Solo Leisure,...,0.938622,customer service ugly. Tried calling two weeks...,customer service ugly. Tried calling two week ...,1,1,439,1_flight_ba_customer_told,"[flight, ba, customer, told, british, day, air...",[itinerary supposed Las Vegas-Chicago-London-V...,flight-ba-customer-told
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3452,3452,3452,3573,British Airways customer review,Colin Pay,United Kingdom,2014-05-11,Not Specified,Rating : 10/10 Cabin Flown Economy Value for M...,,...,0.009776,Rating : 10/10 Cabin Flown Economy Value Money...,Rating : 10/10 Cabin Flown Economy Value Money...,-1,-1,1846,-1_flight_seat_ba_service,"[flight, seat, ba, service, food, crew, cabin,...",[Seoul Incheon London Heathrow. first flight B...,flight-seat-ba-service
3453,3453,3453,3574,British Airways customer review,Jeff Suykerbuyk,Belgium,2014-05-11,Not Specified,08/5/2014 JFK-LHR-BRU. After the perfect outbo...,,...,0.257585,08/5/2014 JFK-LHR-BRU. perfect outbound flight...,08/5/2014 JFK-LHR-BRU. perfect outbound flight...,-1,-1,1846,-1_flight_seat_ba_service,"[flight, seat, ba, service, food, crew, cabin,...",[Seoul Incheon London Heathrow. first flight B...,flight-seat-ba-service
3454,3454,3454,3575,British Airways customer review,Andrew Allen,United Kingdom,2014-05-11,Not Specified,Belfast-Heathrow-Frankfurt return. Very impres...,,...,0.005665,Belfast-Heathrow-Frankfurt return. impressed s...,Belfast-Heathrow-Frankfurt return. impressed s...,5,5,45,5_belfast_flight_lisbon_heathrow,"[belfast, flight, lisbon, heathrow, london, lu...",[Flew London Heathrow Belfast George Best Airp...,belfast-flight-lisbon-heathrow
3455,3455,3455,3576,British Airways customer review,Jeff Suykerbuyk,Belgium,2014-05-11,Not Specified,2/5/2014 BRU-LHR-JFK. Short flight in business...,,...,0.004723,2/5/2014 BRU-LHR-JFK. Short flight business Br...,2/5/2014 BRU-LHR-JFK. Short flight business Br...,-1,-1,1846,-1_flight_seat_ba_service,"[flight, seat, ba, service, food, crew, cabin,...",[Seoul Incheon London Heathrow. first flight B...,flight-seat-ba-service


In [19]:
merged.to_excel("british_airways.xlsx")

Editing dataset

In [1]:
import pandas as pd
df = pd.read_excel("british_airways.xlsx")
df

Unnamed: 0.1,Unnamed: 0,Title,Author,Country,Date,Verified,Comment,Type Of Traveller,Seat Type,Route,...,neutral,negative,Name,Representation,Representative_Docs,NameClear,Sentiment Result,From The City,To The City,Via The City
0,0,"""Total garbage""",Cosmin Stefanescu,Romania,2023-06-16,False,"I flew with numerous airlines, but I gotta adm...",Solo Leisure,Economy Class,Bucharest to Dallas via London,...,0.064647,0.927844,1_flight_ba_customer_told,"['flight', 'ba', 'customer', 'told', 'british'...",['itinerary supposed Las Vegas-Chicago-London-...,flight-ba-customer-told,Negative,Bucharest,Dallas,London
1,1,"""arrived at the airport only 1 hour before""",Emmeline Reichert,United States,2023-06-13,True,We were traveling as a family (5 people). Beca...,Family Leisure,Economy Class,Gatwick to Venice,...,0.566126,0.390113,-1_flight_seat_ba_service,"['flight', 'seat', 'ba', 'service', 'food', 'c...",['Seoul Incheon London Heathrow. first flight ...,flight-seat-ba-service,Neutral,Gatwick,Venice,
2,2,"""so callous and uncaring""",Jamie Gooding,Australia,2023-06-12,True,Flight at 8.40am from DUB to LCY cancelled 115...,Solo Leisure,Economy Class,Dublin to London City,...,0.141682,0.849682,1_flight_ba_customer_told,"['flight', 'ba', 'customer', 'told', 'british'...",['itinerary supposed Las Vegas-Chicago-London-...,flight-ba-customer-told,Negative,Dublin,London City,
3,3,"""uncomfy old planes""",Michael Hayward,United Kingdom,2023-06-11,True,Terrible. I have traveled twice with them this...,Business,Business Class,Tokyo to Manchester via Heathrow,...,0.048446,0.946058,-1_flight_seat_ba_service,"['flight', 'seat', 'ba', 'service', 'food', 'c...",['Seoul Incheon London Heathrow. first flight ...,flight-seat-ba-service,Negative,Tokyo,Manchester,Heathrow
4,4,"""never fly British Airway ever again""",George W Edmonds,United States,2023-06-11,True,The customer service is ugly. Tried calling tw...,Solo Leisure,Economy Class,San Francisco to London,...,0.056151,0.938622,1_flight_ba_customer_told,"['flight', 'ba', 'customer', 'told', 'british'...",['itinerary supposed Las Vegas-Chicago-London-...,flight-ba-customer-told,Negative,San Francisco,London,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3452,3452,British Airways customer review,Colin Pay,United Kingdom,2014-05-11,Not Specified,Rating : 10/10 Cabin Flown Economy Value for M...,,Economy Class,,...,0.439142,0.009776,-1_flight_seat_ba_service,"['flight', 'seat', 'ba', 'service', 'food', 'c...",['Seoul Incheon London Heathrow. first flight ...,flight-seat-ba-service,Positive,,,
3453,3453,British Airways customer review,Jeff Suykerbuyk,Belgium,2014-05-11,Not Specified,08/5/2014 JFK-LHR-BRU. After the perfect outbo...,,Business Class,,...,0.507833,0.257585,-1_flight_seat_ba_service,"['flight', 'seat', 'ba', 'service', 'food', 'c...",['Seoul Incheon London Heathrow. first flight ...,flight-seat-ba-service,Neutral,,,
3454,3454,British Airways customer review,Andrew Allen,United Kingdom,2014-05-11,Not Specified,Belfast-Heathrow-Frankfurt return. Very impres...,,Business Class,,...,0.039826,0.005665,5_belfast_flight_lisbon_heathrow,"['belfast', 'flight', 'lisbon', 'heathrow', 'l...",['Flew London Heathrow Belfast George Best Air...,belfast-flight-lisbon-heathrow,Positive,,,
3455,3455,British Airways customer review,Jeff Suykerbuyk,Belgium,2014-05-11,Not Specified,2/5/2014 BRU-LHR-JFK. Short flight in business...,,Business Class,,...,0.081130,0.004723,-1_flight_seat_ba_service,"['flight', 'seat', 'ba', 'service', 'food', 'c...",['Seoul Incheon London Heathrow. first flight ...,flight-seat-ba-service,Positive,,,


In [21]:
df['Sentiment Result'] = ""
for index, row in df.iterrows():
    if row['positive'] >= row['neutral']:
        if row['positive'] >= row['negative']:
            df.at[index, 'Sentiment Result'] = 'Positive'
        else:
            df.at[index, 'Sentiment Result'] = 'Negative'
    else:
        if row['neutral'] >= row['negative']:
            df.at[index, 'Sentiment Result'] = 'Neutral'
        else:
            df.at[index, 'Sentiment Result'] = 'Negative'    

In [52]:
df.head()

Unnamed: 0,Title,Author,Country,Date,Verified,Comment,Type Of Traveller,Seat Type,Route,Date Flown,...,neutral,negative,Name,Representation,Representative_Docs,NameClear,Sentiment Result,From The City,To The City,Via The City
0,"""Total garbage""",Cosmin Stefanescu,Romania,2023-06-16,False,"I flew with numerous airlines, but I gotta adm...",Solo Leisure,Economy Class,Bucharest to Dallas via London,June 2023,...,0.064647,0.927844,1_flight_ba_customer_told,"['flight', 'ba', 'customer', 'told', 'british'...",['itinerary supposed Las Vegas-Chicago-London-...,flight-ba-customer-told,Negative,Bucharest,Dallas,London
1,"""arrived at the airport only 1 hour before""",Emmeline Reichert,United States,2023-06-13,True,We were traveling as a family (5 people). Beca...,Family Leisure,Economy Class,Gatwick to Venice,June 2023,...,0.566126,0.390113,-1_flight_seat_ba_service,"['flight', 'seat', 'ba', 'service', 'food', 'c...",['Seoul Incheon London Heathrow. first flight ...,flight-seat-ba-service,Neutral,Gatwick,Venice,
2,"""so callous and uncaring""",Jamie Gooding,Australia,2023-06-12,True,Flight at 8.40am from DUB to LCY cancelled 115...,Solo Leisure,Economy Class,Dublin to London City,June 2023,...,0.141682,0.849682,1_flight_ba_customer_told,"['flight', 'ba', 'customer', 'told', 'british'...",['itinerary supposed Las Vegas-Chicago-London-...,flight-ba-customer-told,Negative,Dublin,London City,
3,"""uncomfy old planes""",Michael Hayward,United Kingdom,2023-06-11,True,Terrible. I have traveled twice with them this...,Business,Business Class,Tokyo to Manchester via Heathrow,June 2023,...,0.048446,0.946058,-1_flight_seat_ba_service,"['flight', 'seat', 'ba', 'service', 'food', 'c...",['Seoul Incheon London Heathrow. first flight ...,flight-seat-ba-service,Negative,Tokyo,Manchester,Heathrow
4,"""never fly British Airway ever again""",George W Edmonds,United States,2023-06-11,True,The customer service is ugly. Tried calling tw...,Solo Leisure,Economy Class,San Francisco to London,June 2023,...,0.056151,0.938622,1_flight_ba_customer_told,"['flight', 'ba', 'customer', 'told', 'british'...",['itinerary supposed Las Vegas-Chicago-London-...,flight-ba-customer-told,Negative,San Francisco,London,


In [53]:
df.to_excel("british_airways.xlsx")

In [51]:
df['From The City'] = ""
df['To The City'] = ""
df['Via The City'] = ""

for index, row in df.iterrows():
    if row['Route'] is None or type(row['Route']) == float:
        continue
    route = row['Route'].split(" ")
    try:
        df.at[index, 'From The City'] = " ".join(route[:route.index('to')])
    except:
        continue
    try:
        df.at[index, 'To The City'] = " ".join(route[route.index('to')+1:route.index('via')])
        df.at[index, 'Via The City'] = " ".join(route[route.index('via')+1:])
    except:
        df.at[index, 'To The City'] = " ".join(route[route.index('to')+1:])