In [1]:
# Import libaries

from bs4 import BeautifulSoup
import pandas as pd
import requests
import numpy as np

In [2]:
final_list = []

# Skytrax URL
url = 'https://www.airlinequality.com/airline-reviews/adria-airways/?sortby=post_date%3ADesc&pagesize=20'

# Request URL
response = requests.get(url)

# Create Soup
soup = BeautifulSoup(response.content, 'lxml')

# For loop for HTML
for idx, review in enumerate(soup.find_all('div',{'class':"review-stats"})):
    
    # Create a empty dictionary to contain columns and values
    review_dict = dict()
    
    # Values that is in star fill
    names = review.find_all('td', class_='review-rating-header')[::-1]
    ratings = review.find_all('td', class_='stars')[::-1]
    
    for name, rating in zip(names[1:], ratings):
        review_dict[name.text] = rating.find_all(class_='fill')[-1].text
    
    # Author, Date, Overall_rating,Text_content, Recommended
    review_dict['Author'] = soup.find_all(itemprop='name')[idx+1].text
    review_dict['Date'] = soup.find_all(itemprop='datePublished')[::2][idx].attrs['content']
    review_dict['Overall_rating'] = soup.find_all('div',{'itemprop':'reviewRating'})[idx].find(itemprop="ratingValue").text
    review_dict['text_content'] = soup.find_all('div',class_='text_content')[idx].text
    review_dict['Recommended'] = soup.find_all('div',{'class':"review-stats"})[idx].find_all('tr')[-1].find_all('td')[-1].text
        
    # Type of Traveller
    try:
        review.find('td',{'class':'review-rating-header type_of_traveller'}).text is not None
        review_dict['traveller_type'] = soup.find_all('td',{'class':'review-rating-header type_of_traveller'})[idx].fetchNextSiblings()[0].text
    
    except:
        review_dict['traveller_type'] = np.nan
    
    # Seat type
    try:
        review.find('td',{'class':'review-rating-header cabin_flown'}).text is not None
        review_dict['seat_type'] = soup.find_all('td',{'class':'review-rating-header cabin_flown'})[idx].fetchNextSiblings()[0].text
    
    except:
        review_dict['seat_type'] = np.nan
    
    
    # Append dictionary into a list
    final_list.append(review_dict)

# Create DataFrame
df = pd.DataFrame(final_list)
df['airline'] = 'adria airways'

# rearrange dataframe
df = df[['airline','Author','Date','text_content','traveller_type','seat_type','Overall_rating','Seat Comfort',
 'Cabin Staff Service','Food & Beverages','Inflight Entertainment','Ground Service','Value For Money',  
  'Recommended']]

# rename dataframe
df.columns = ['airline_name', 'author', 'date', 'content', 'type_traveller','cabin_flown', 'overall_rating', 'seat_comfort_rating',
'cabin_staff_rating', 'food_beverages_rating','inflight_entertainment_rating', 'ground_service_rating','value_money_rating', 'recommended']

# Save to CSV
df.to_csv('Data/adria_airways.csv',index=False)

In [3]:
# Sanity check
df

Unnamed: 0,airline_name,author,date,content,type_traveller,cabin_flown,overall_rating,seat_comfort_rating,cabin_staff_rating,food_beverages_rating,inflight_entertainment_rating,ground_service_rating,value_money_rating,recommended
0,adria airways,D Praetextatus,2019-09-28,Not Verified | Please do a favor yourself and...,Solo Leisure,Economy Class,1,1,1,,,1,1,no
1,adria airways,D Meijer,2019-09-24,✅ Trip Verified | Do not book a flight with th...,Couple Leisure,Economy Class,1,1,1,1.0,1.0,1,1,no
2,adria airways,Herbse Mayer,2019-09-17,✅ Trip Verified | Had very bad experience wit...,Couple Leisure,Economy Class,1,1,1,1.0,1.0,1,1,no
3,adria airways,D Bole,2019-09-06,"Not Verified | Ljubljana to Zürich. Firstly, ...",Business,Economy Class,1,1,1,1.0,,1,1,no
4,adria airways,B Cosmin,2019-08-24,"Not Verified | First of all, I am not complai...",Solo Leisure,Economy Class,1,1,1,1.0,1.0,1,1,no
5,adria airways,Thomas Gloor,2019-08-06,✅ Trip Verified | Worst Airline ever! They co...,Solo Leisure,Economy Class,1,1,2,1.0,1.0,1,1,no
6,adria airways,M Jager,2018-10-12,✅ Trip Verified | Ljubljana to Munich. The hom...,Family Leisure,Economy Class,8,4,4,3.0,,5,5,yes
7,adria airways,Giulia Rossi,2018-10-05,Not Verified | Zurich to Ljubljana. Very poor...,Business,Economy Class,1,2,1,,1.0,1,1,no
8,adria airways,Galya Slavov,2018-07-29,✅ Trip Verified | Vienna to Sofia. The flight...,Family Leisure,Economy Class,1,4,1,1.0,,4,1,no
9,adria airways,Loic Jouan,2018-07-19,✅ Trip Verified | We were traveling from Pari...,Solo Leisure,Economy Class,2,3,3,,,3,2,no


In [4]:
df.columns

Index(['airline_name', 'author', 'date', 'content', 'type_traveller',
       'cabin_flown', 'overall_rating', 'seat_comfort_rating',
       'cabin_staff_rating', 'food_beverages_rating',
       'inflight_entertainment_rating', 'ground_service_rating',
       'value_money_rating', 'recommended'],
      dtype='object')