In [1]:
# Import libaries

from bs4 import BeautifulSoup
import pandas as pd
import requests
import numpy as np

In [2]:
final_list = []

# Skytrax URL
url = 'https://www.airlinequality.com/airline-reviews/singapore-airlines/?sortby=post_date%3ADesc&pagesize=100'

# Request URL
response = requests.get(url)

# Create Soup
soup = BeautifulSoup(response.content, 'lxml')

# For loop for HTML
for idx, review in enumerate(soup.find_all('div',{'class':"review-stats"})):
    
    # Create a empty dictionary to contain columns and values
    review_dict = dict()
    
    # Values that is in star fill
    names = review.find_all('td', class_='review-rating-header')[::-1]
    ratings = review.find_all('td', class_='stars')[::-1]
    
    for name, rating in zip(names[1:], ratings):
        review_dict[name.text] = rating.find_all(class_='fill')[-1].text
    
    # Author, Date, Overall_rating,Text_content, Recommended
    review_dict['Author'] = soup.find_all(itemprop='name')[idx+1].text
    review_dict['Date'] = soup.find_all(itemprop='datePublished')[::2][idx].attrs['content']
    review_dict['Overall_rating'] = soup.find_all('div',{'itemprop':'reviewRating'})[idx].find(itemprop="ratingValue").text
    review_dict['text_content'] = soup.find_all('div',class_='text_content')[idx].text
    review_dict['Recommended'] = soup.find_all('div',{'class':"review-stats"})[idx].find_all('tr')[-1].find_all('td')[-1].text
        
    # Type of Traveller
    try:
        review.find('td',{'class':'review-rating-header type_of_traveller'}).text is not None
        review_dict['traveller_type'] = soup.find_all('td',{'class':'review-rating-header type_of_traveller'})[idx].fetchNextSiblings()[0].text
    
    except:
        review_dict['traveller_type'] = np.nan
    
    # Seat type
    try:
        review.find('td',{'class':'review-rating-header cabin_flown'}).text is not None
        review_dict['seat_type'] = soup.find_all('td',{'class':'review-rating-header cabin_flown'})[idx].fetchNextSiblings()[0].text
    
    except:
        review_dict['seat_type'] = np.nan
    
    
    # Append dictionary into a list
    final_list.append(review_dict)

# Create DataFrame
df = pd.DataFrame(final_list)
df['airline'] = 'singapore airlines'

# rearrange dataframe
df = df[['airline','Author','Date','text_content','traveller_type','seat_type','Overall_rating','Seat Comfort',
 'Cabin Staff Service','Food & Beverages','Inflight Entertainment','Ground Service','Value For Money',  
  'Recommended']]

# rename dataframe
df.columns = ['airline_name', 'author', 'date', 'content', 'type_traveller','cabin_flown', 'overall_rating', 'seat_comfort_rating',
'cabin_staff_rating', 'food_beverages_rating','inflight_entertainment_rating', 'ground_service_rating','value_money_rating', 'recommended']

# Save to CSV
df.to_csv('Data/singapore_airlines.csv',index=False)

In [3]:
# Sanity check
df.head()

Unnamed: 0,airline_name,author,date,content,type_traveller,cabin_flown,overall_rating,seat_comfort_rating,cabin_staff_rating,food_beverages_rating,inflight_entertainment_rating,ground_service_rating,value_money_rating,recommended
0,singapore airlines,Bagus Ferriyanto,2020-06-11,✅ Trip Verified | Osaka Kansai to Singapore. ...,Solo Leisure,Economy Class,9,5.0,4.0,5.0,5.0,5.0,5,yes
1,singapore airlines,Bagus Ferriyanto,2020-06-10,✅ Trip Verified | Bali Denpasar to Tokyo Nari...,Solo Leisure,Economy Class,8,5.0,4.0,4.0,5.0,5.0,5,yes
2,singapore airlines,David Liu,2020-05-29,Not Verified | Singapore to Taipei in Decembe...,Family Leisure,Economy Class,8,4.0,5.0,1.0,5.0,4.0,4,yes
3,singapore airlines,I Darnen,2020-05-18,✅ Trip Verified | 15th March I was meant to f...,Couple Leisure,Economy Class,1,,,,,,1,no
4,singapore airlines,S Tan,2020-04-25,✅ Trip Verified | Despicable handling of tick...,Solo Leisure,Economy Class,3,,,,,,3,no


In [4]:
df.tail()

Unnamed: 0,airline_name,author,date,content,type_traveller,cabin_flown,overall_rating,seat_comfort_rating,cabin_staff_rating,food_beverages_rating,inflight_entertainment_rating,ground_service_rating,value_money_rating,recommended
95,singapore airlines,J Karali,2019-11-09,✅ Trip Verified | Colombo to Sydney via Singa...,Couple Leisure,Economy Class,6,4,1,2,4,4,3,no
96,singapore airlines,Elaine Loh,2019-11-07,✅ Trip Verified | Singapore to Amsterdam. Gre...,Solo Leisure,Business Class,10,5,5,5,4,5,4,yes
97,singapore airlines,Elaine Loh,2019-11-06,✅ Trip Verified | Perth to Singapore. A pleas...,Solo Leisure,Economy Class,9,4,5,4,4,5,5,yes
98,singapore airlines,John Roder,2019-11-06,Not Verified | We flew Singapore Airlines busi...,Couple Leisure,Business Class,6,4,1,1,4,4,3,yes
99,singapore airlines,H Darvani,2019-11-05,"✅ Trip Verified | Frankfurt to New York, ever...",Solo Leisure,Economy Class,8,3,5,3,4,5,4,yes
