## Web Scrapping customer Reviews of British Airways
We will use a package called `BeautifulSoup` to collect the data from the web. Once you've collected your data and saved it into a local `.csv` file you should start with your analysis.

In [3]:
import requests     # for making HTTP requests
from bs4 import BeautifulSoup   # for parsing HTML
import pandas as pd # for data manipulation

In [4]:
base_url = "https://www.airlinequality.com/airline-reviews/british-airways"  # URL to scrape from
pages = 10  # Number of pages to scrape
page_size = 100 # Number of reviews per page

reviews = []    # List to store reviews

# Loop through the number of pages and scrape the reviews
for i in range(1, pages + 1):

    print(f"Scraping page {i}")

    # Create URL to collect links from paginated data
    url = f"{base_url}/page/{i}/?sortby=post_date%3ADesc&pagesize={page_size}"

    # Collect HTML data from this page
    response = requests.get(url)

    # Parse content
    content = response.content
    parsed_content = BeautifulSoup(content, 'html.parser')
    for para in parsed_content.find_all("div", {"class": "text_content"}):
        reviews.append(para.get_text())
    
    print(f"   ---> {len(reviews)} total reviews")

Scraping page 1
   ---> 100 total reviews
Scraping page 2
   ---> 200 total reviews
Scraping page 3
   ---> 300 total reviews
Scraping page 4
   ---> 400 total reviews
Scraping page 5
   ---> 500 total reviews
Scraping page 6
   ---> 600 total reviews
Scraping page 7
   ---> 700 total reviews
Scraping page 8
   ---> 800 total reviews
Scraping page 9
   ---> 900 total reviews
Scraping page 10
   ---> 1000 total reviews


In [5]:
df = pd.DataFrame() # Create a new DataFrame
df["reviews"] = reviews # Add reviews to the DataFrame
df.head()   # Display the first 5 rows of the DataFrame

Unnamed: 0,reviews
0,"Not Verified | Before my flight, I was forced ..."
1,✅ Trip Verified | British Airways at its bes...
2,✅ Trip Verified | An excellent flight! Despite...
3,✅ Trip Verified | I recently traveled with Bri...
4,✅ Trip Verified | My family and I were booke...


In [6]:
df.to_csv("data/BA_reviews.csv")    # Save the DataFrame to a CSV file