In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [11]:
# Function to extract review details
def extract_review(review):
    review_dict = {}

    # Extract the date
    try:
        review_dict["date"] = review.find("meta").get("content")
    except AttributeError:
        review_dict["date"] = None

    # Extract the header
    header = review.find("h2", {"class": "text_header"})
    review_dict["header"] = header.get_text() if header else None

    # Extract the rating
    rating = review.find("div", {"class": "rating-10"})
    review_dict["rating"] = None  # Set a default value of None for rating
    if rating:
        span = rating.find("span")
        if span:
            review_dict["rating"] = span.get_text()

    # Extract the content
    content = review.find("div", {"class": "text_content"})
    review_dict["content"] = content.get_text().strip() if content else None

    # Extract additional ratings from the table
    table = review.find("table", {"class": "review-ratings"})
    if table:
        data = table.find_all("td")
        keys = data[::2]
        values = data[1::2]

        for key, value in zip(keys, values):
            key_text = key.get_text()
            star_value = None

            try:
                star_value = value.find_all("span", {"class": "star fill"})[-1]
            except IndexError:
                pass

            if star_value:
                review_dict[key_text] = star_value.get_text()
            else:
                review_dict[key_text] = value.get_text() if value else None

    return review_dict


In [12]:
import re

# Function to get total number of pages
def get_total_pages(url):
    response = requests.get(url)
    content = BeautifulSoup(response.content, "html.parser")
    
    # Find the total number of pages by looking for a page navigation element
    pagination = content.find("div", {"class": "pagination-total"})
    if pagination:
        total_pages_text = pagination.get_text(strip=True)
        # Extract only numbers from the text using regular expressions
        total_pages = re.findall(r'\d+', total_pages_text)
        if total_pages:
            return int(total_pages[-1])  # Return the last number (total pages)
    return 1

In [13]:
# Function to scrape all pages
def get_all_reviews(airline_url):
    reviews_list = []
    total_pages = get_total_pages(airline_url)  # Get the total number of pages
    print(f"Total pages found: {total_pages}")

    for page in range(1, total_pages + 1):
        # Request each page's URL
        response = requests.get(f"{airline_url}page/{page}/")
        content = BeautifulSoup(response.content, "html.parser")
        
        # Find all review articles
        content_reviews = content.find_all("article", class_=lambda value: value and value.startswith("review-"))

        if not content_reviews:  # Stop if no more reviews
            break

        # Loop through reviews and extract data
        for review in content_reviews:
            review_data = extract_review(review)
            reviews_list.append(review_data)

        print(f"Scraped page {page} of {total_pages}")

    # Convert list of dictionaries to pandas DataFrame
    reviews_df = pd.DataFrame(reviews_list)

    # Filter out rows where 'header' and 'rating' are None (e.g., 'Air New Zealand Photos')
    reviews_df = reviews_df.dropna(subset=['header', 'rating'])

    return reviews_df


In [14]:
# Define the URL for Air New Zealand reviews
airline_url = "https://www.airlinequality.com/airline-reviews/air-new-zealand/"

# Scrape all reviews across pages
reviews_df = get_all_reviews(airline_url)

# Display the DataFrame
print(reviews_df)


Total pages found: 865
Scraped page 1 of 865
Scraped page 2 of 865
Scraped page 3 of 865
Scraped page 4 of 865
Scraped page 5 of 865
Scraped page 6 of 865
Scraped page 7 of 865
Scraped page 8 of 865
Scraped page 9 of 865
Scraped page 10 of 865
Scraped page 11 of 865
Scraped page 12 of 865
Scraped page 13 of 865
Scraped page 14 of 865
Scraped page 15 of 865
Scraped page 16 of 865
Scraped page 17 of 865
Scraped page 18 of 865
Scraped page 19 of 865
Scraped page 20 of 865
Scraped page 21 of 865
Scraped page 22 of 865
Scraped page 23 of 865
Scraped page 24 of 865
Scraped page 25 of 865
Scraped page 26 of 865
Scraped page 27 of 865
Scraped page 28 of 865
Scraped page 29 of 865
Scraped page 30 of 865
Scraped page 31 of 865
Scraped page 32 of 865
Scraped page 33 of 865
Scraped page 34 of 865
Scraped page 35 of 865
Scraped page 36 of 865
Scraped page 37 of 865
Scraped page 38 of 865
Scraped page 39 of 865
Scraped page 40 of 865
Scraped page 41 of 865
Scraped page 42 of 865
Scraped page 43 of 8

In [16]:
# Save the scraped reviews to a CSV file
reviews_df.to_csv("air_new_zealand_reviews.csv", index=False)
reviews_df

Unnamed: 0,date,header,rating,content,Type Of Traveller,Seat Type,Route,Date Flown,Seat Comfort,Cabin Staff Service,Ground Service,Value For Money,Recommended,Aircraft,Food & Beverages,Inflight Entertainment,Wifi & Connectivity
0,2024-08-19,"""getting worse by the day""",3,Not Verified | Air New Zealand domestic servi...,Business,Economy Class,Auckland to Napier,August 2024,4,5,5,1,no,,,,
1,2024-08-13,"""never fly Air New Zealand again""",1,✅ Trip Verified | Air New Zealand bumped us ...,Family Leisure,Economy Class,Napier to Auckland,June 2024,3,4,1,1,no,ATR-72,,,
2,2024-07-29,"""has been going downhill""",1,Not Verified | Very unfortunately the servic...,Solo Leisure,Business Class,San Francisco to Queenstown via Auckland,August 2023,2,3,1,2,no,,2,2,1
3,2024-07-19,"""Cabin crew and aircraft were great""",10,✅ Trip Verified | Check-in was efficient; boa...,Solo Leisure,Economy Class,Auckland to Hobart,December 2023,4,5,4,5,yes,A320N,4,,
4,2024-07-07,"""Such ugly customer service""",1,Not Verified | Air NZ sold a connecting flig...,Family Leisure,Economy Class,,July 2024,,,,1,no,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
944,2013-05-08,Air New Zealand customer review,9,Flew Premium Economy from LAX to AKL on a 777-...,,Premium Economy,,,4,5,,5,yes,,5,5,
946,2013-05-08,Air New Zealand customer review,3,Leg room on Internal flights is getting smalle...,,Economy Class,,,1,4,,3,no,,3,,
947,2013-05-06,Air New Zealand customer review,5,Adelaide to Auckland April 28 2013. Usual incr...,,Economy Class,,,3,3,,3,no,,4,1,
948,2013-05-01,Air New Zealand customer review,5,I have been flying with Air New Zealand since ...,,Premium Economy,,,4,3,,2,no,,4,2,
