# Web scrapping for sentiment data from an airline website for analysis 

In [1]:
import requests
from bs4 import BeautifulSoup
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer

In [3]:
import csv
import requests
from bs4 import BeautifulSoup

# specify the base URL of the page to be scraped
base_url = 'https://www.airlinequality.com/airline-reviews/british-airways/page/'

# specify the number of pages to scrape
num_pages = 10

# specify the page size
page_size = 1000

# create an empty list to store the reviews
reviews = []

# loop through each page to be scraped
for page_num in range(1, num_pages + 1):

    # specify the URL of the page to be scraped
    url = base_url + str(page_num) + '/?sortby=post_date%3ADesc&pagesize=' + str(page_size)

    # send a GET request to the URL
    response = requests.get(url)

    # create a BeautifulSoup object to parse the HTML content
    soup = BeautifulSoup(response.content, 'html.parser')

    # loop through each review on the page and extract the review text
    for review in soup.find_all('div', class_='text_content'):
        review_text = review.get_text(strip=True)
        reviews.append(review_text)

    print('Scraped page', page_num)

# create a CSV file and write the reviews to it
with open('british-airways-reviews.csv', 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['Review'])
    for review in reviews:
        writer.writerow([review])

print('Done writing reviews to CSV file')

Scraped page 1
Scraped page 2
Scraped page 3
Scraped page 4
Scraped page 5
Scraped page 6
Scraped page 7
Scraped page 8
Scraped page 9
Scraped page 10
Done writing reviews to CSV file


In [None]:
import csv
import requests
from bs4 import BeautifulSoup
from nltk.sentiment.vader import SentimentIntensityAnalyzer

# Define the base URL and number of pages to scrape
base_url = "https://www.airlinequality.com/airline-reviews/british-airways"
num_pages = 10
page_size = 100

# Initialize the sentiment analyzer
sia = SentimentIntensityAnalyzer()

# Define a function to analyze the sentiment of a given text
def analyze_sentiment(text):
    scores = sia.polarity_scores(text)
    return scores['compound']

# Initialize the list of reviews
reviews = []

# Scrape the reviews from each page
for i in range(1, num_pages + 1):
    # Define the URL for the current page
    url = f"{base_url}/page/{i}/?sortby=post_date%3ADesc&pagesize={page_size}"

    # Retrieve the HTML content
    response = requests.get(url)
    content = response.content

    # Parse the HTML content
    soup = BeautifulSoup(content, 'html.parser')

    # Extract the review text and add it to the list of reviews
    for review in soup.find_all("div", {"class": "text_content"}):
        text = review.get_text().strip()
        sentiment = analyze_sentiment(text)
        reviews.append({'text': text, 'sentiment': sentiment})

# Write the reviews to a CSV file
with open('reviews.csv', 'w', newline='') as csvfile:
    fieldnames = ['text', 'sentiment']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    for review in reviews:
        writer.writerow({'text': review['text'], 'sentiment': review['sentiment']})
