# GAP

In [7]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

# Base URL of the website with reviews
base_url = 'https://www.trustpilot.com/review/www.gap.com'

# Initialize an empty list to store the extracted data
reviews_data = []

# Function to scrape reviews from a specific page
def scrape_reviews(page_number):
    if page_number == 1:
        url = base_url  # First page doesn't have the `?page=` parameter
    else:
        url = f"{base_url}?page={page_number}"  # Other pages have `?page=`
        
    # Send a GET request to fetch the HTML content
    response = requests.get(url)

    # Check if the request was successful
    if response.status_code != 200:
        print(f"Failed to retrieve page {page_number}: {response.status_code}")
        return
    
    html_content = response.content
    soup = BeautifulSoup(html_content, 'lxml')

    # Find all review sections
    reviews = soup.find_all('section', class_='styles_reviewContentwrapper__zH_9M')

    # Loop through each review section and extract the details
    for review in reviews:
        try:
            # Extract the rating
            rating = review.find('div', class_='styles_reviewHeader__iU9Px')['data-service-review-rating']
        except (TypeError, AttributeError):
            rating = None  # Set rating to None if extraction fails

        try:
            # Extract the review title
            review_title = review.find('h2', {'data-service-review-title-typography': 'true'}).text.strip()
        except (AttributeError, TypeError):
            review_title = None  # Set title to None if extraction fails

        try:
            # Extract the review content
            review_content = review.find('p', {'data-service-review-text-typography': 'true'}).text.strip()
        except (AttributeError, TypeError):
            review_content = None  # Set content to None if extraction fails

        try:
            # Extract the date of experience
            date_of_experience = review.find('p', {'data-service-review-date-of-experience-typography': 'true'}).text.split(':')[-1].strip()
        except (AttributeError, TypeError):
            date_of_experience = None  # Set date to None if extraction fails

        # Append the extracted data to the reviews_data list
        reviews_data.append({
            'Rating': rating,
            'Review Title': review_title,
            'Review Content': review_content,
            'Date of Experience': date_of_experience
        })

# Scrape reviews from all pages (1 to 25)
for page in range(1, 26):
    scrape_reviews(page)

# Convert the reviews data into a pandas DataFrame
df_gap = pd.DataFrame(reviews_data)

# Display the DataFrame
print(f"Total reviews scraped: {len(df_gap)}")

# Optionally, save the DataFrame to a CSV file
#df.to_csv('scraped_reviews.csv', index=False)


Total reviews scraped: 487


In [8]:
df_gap.head()

Unnamed: 0,Rating,Review Title,Review Content,Date of Experience
0,1,WARNING WARNING WARNING,WARNING WARNING WARNING — DO NOT ORDER FROM GA...,"September 22, 2024"
1,1,Carrier will leave your package anywhere,Order from them they used some other carrier n...,"September 24, 2024"
2,1,Absolutely terrible online service in…,Absolutely terrible online service in addition...,"August 13, 2024"
3,5,"GAP Bridgewater, NJ team is AWESOME","GAP Bridgewater, NJ team is AWESOME. They hel...","August 10, 2024"
4,1,terrible customer service,the customer service people at Gap couldn't sp...,"May 29, 2024"


# Old NAvy

In [15]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

# Base URL of the website with reviews
base_url = 'https://www.trustpilot.com/review/oldnavy.ca'

# Initialize an empty list to store the extracted data
reviews_data = []

# Function to scrape reviews from a specific page
def scrape_reviews(page_number):
    if page_number == 1:
        url = base_url  # First page doesn't have the `?page=` parameter
    else:
        url = f"{base_url}?page={page_number}"  # Other pages have `?page=`

    #print(f"Scraping page {page_number}...")
    
    # Send a GET request to fetch the HTML content
    response = requests.get(url)

    # Check if the request was successful
    if response.status_code != 200:
        print(f"Failed to retrieve page {page_number}: {response.status_code}")
        return
    
    html_content = response.content
    soup = BeautifulSoup(html_content, 'lxml')

    # Find all review sections
    reviews = soup.find_all('section', class_='styles_reviewContentwrapper__zH_9M')

    # Loop through each review section and extract the details
    for review in reviews:
        try:
            # Extract the rating
            rating = review.find('div', class_='styles_reviewHeader__iU9Px')['data-service-review-rating']
        except (TypeError, AttributeError):
            rating = None  # Set rating to None if extraction fails

        try:
            # Extract the review title
            review_title = review.find('h2', {'data-service-review-title-typography': 'true'}).text.strip()
        except (AttributeError, TypeError):
            review_title = None  # Set title to None if extraction fails

        try:
            # Extract the review content
            review_content = review.find('p', {'data-service-review-text-typography': 'true'}).text.strip()
        except (AttributeError, TypeError):
            review_content = None  # Set content to None if extraction fails

        try:
            # Extract the date of experience
            date_of_experience = review.find('p', {'data-service-review-date-of-experience-typography': 'true'}).text.split(':')[-1].strip()
        except (AttributeError, TypeError):
            date_of_experience = None  # Set date to None if extraction fails

        # Append the extracted data to the reviews_data list
        reviews_data.append({
            'Rating': rating,
            'Review Title': review_title,
            'Review Content': review_content,
            'Date of Experience': date_of_experience
        })

# Scrape reviews from all pages (1 to 25)
for page in range(1, 23):
    scrape_reviews(page)

# Convert the reviews data into a pandas DataFrame
df_old_navy = pd.DataFrame(reviews_data)

# Display the DataFrame
print(f"Total reviews scraped: {len(df_old_navy)}")

# Optionally, save the DataFrame to a CSV file
#df.to_csv('scraped_reviews.csv', index=False)

Total reviews scraped: 421


In [27]:
df.head()

Unnamed: 0,Rating,Review Title,Review Content,Date of Experience
0,5,Affordable clothes,Great clothes and customer service. I DONT kno...,"August 20, 2024"
1,1,Old Navy uses OnTrac/Lasership for…,Old Navy uses OnTrac/Lasership for deliver. Th...,"August 02, 2024"
2,1,If 1 could give 0 stars I would,If 1 could give 0 stars I would. per old navy ...,"August 19, 2024"
3,1,STOP USING LASERSHIP/ONTRAC,Old Navy’s products are fine for the price. Th...,"August 02, 2024"
4,1,Ordered a bathing suit for my…,Ordered a bathing suit for my granddaughter. ...,"July 20, 2024"


# Banana Republic

In [19]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

# Base URL of the website with reviews
base_url = 'https://www.trustpilot.com/review/bananarepublic.eu'

# Initialize an empty list to store the extracted data
reviews_data = []

# Function to scrape reviews from a specific page
def scrape_reviews(page_number):
    if page_number == 1:
        url = base_url  # First page doesn't have the `?page=` parameter
    else:
        url = f"{base_url}?page={page_number}"  # Other pages have `?page=`

    #print(f"Scraping page {page_number}...")
    
    # Send a GET request to fetch the HTML content
    response = requests.get(url)

    # Check if the request was successful
    if response.status_code != 200:
        print(f"Failed to retrieve page {page_number}: {response.status_code}")
        return
    
    html_content = response.content
    soup = BeautifulSoup(html_content, 'lxml')

    # Find all review sections
    reviews = soup.find_all('section', class_='styles_reviewContentwrapper__zH_9M')

    # Loop through each review section and extract the details
    for review in reviews:
        try:
            # Extract the rating
            rating = review.find('div', class_='styles_reviewHeader__iU9Px')['data-service-review-rating']
        except (TypeError, AttributeError):
            rating = None  # Set rating to None if extraction fails

        try:
            # Extract the review title
            review_title = review.find('h2', {'data-service-review-title-typography': 'true'}).text.strip()
        except (AttributeError, TypeError):
            review_title = None  # Set title to None if extraction fails

        try:
            # Extract the review content
            review_content = review.find('p', {'data-service-review-text-typography': 'true'}).text.strip()
        except (AttributeError, TypeError):
            review_content = None  # Set content to None if extraction fails

        try:
            # Extract the date of experience
            date_of_experience = review.find('p', {'data-service-review-date-of-experience-typography': 'true'}).text.split(':')[-1].strip()
        except (AttributeError, TypeError):
            date_of_experience = None  # Set date to None if extraction fails

        # Append the extracted data to the reviews_data list
        reviews_data.append({
            'Rating': rating,
            'Review Title': review_title,
            'Review Content': review_content,
            'Date of Experience': date_of_experience
        })

# Scrape reviews from all pages (1 to 25)
for page in range(1, 8):
    scrape_reviews(page)

# Convert the reviews data into a pandas DataFrame
df_banana_republic = pd.DataFrame(reviews_data)

# Display the DataFrame
print(f"Total reviews scraped: {len(df_banana_republic)}")

# Optionally, save the DataFrame to a CSV file
#df.to_csv('scraped_reviews.csv', index=False)

Total reviews scraped: 124


In [29]:
df_banana_republic.head()

Unnamed: 0,Rating,Review Title,Review Content,Date of Experience
0,1,I am trying to return items I ordered…,I am trying to return items I ordered from Ban...,"September 23, 2024"
1,1,Please don't order from this company,Please don't order from this company. I wish I...,"September 16, 2024"
2,1,"Shapeless, happless mass","I used to love Banana Republic, bought many of...","August 23, 2024"
3,1,Sends garbage as final sale,I purchased two final sale items from Banana R...,"August 26, 2024"
4,4,I used to work in the Banana Republic…,I used to work in the Banana Republic in Marke...,"August 30, 2024"


In [5]:
import azure

In [11]:
import requests
import time
import pandas as pd
from azure.ai.textanalytics import TextAnalyticsClient
from azure.core.credentials import AzureKeyCredential

# Azure API credentials
endpoint = "https://dhairya1899.cognitiveservices.azure.com/"
key = "79f35fa73b5c4ff08a284193f18068cf"

# Authenticate client
def authenticate_client():
    ta_credential = AzureKeyCredential(key)
    text_analytics_client = TextAnalyticsClient(endpoint=endpoint, credential=ta_credential)
    return text_analytics_client

client = authenticate_client()

# Function for sentiment analysis
def sentiment_analysis_example(client, review):
    documents = [review]
    response = client.analyze_sentiment(documents=documents)[0]
    sentiments = []
    for sentence in response.sentences:
        sentiments.append(sentence.sentiment)
    return sentiments[0]  # Get the overall sentiment of the first sentence

# Initialize rate limiting variables
requests_per_second_limit = 100
requests_per_minute_limit = 300
time_start = time.time()

# Function to handle rate limiting
def handle_rate_limit(request_count, start_time, per_second_limit, per_minute_limit):
    elapsed_time = time.time() - start_time
    if request_count % per_second_limit == 0:
        if elapsed_time < 1:
            time.sleep(1 - elapsed_time)
    if request_count % per_minute_limit == 0:
        if elapsed_time < 60:
            time.sleep(60 - elapsed_time)
    return time.time()

# Perform sentiment analysis with rate limiting
azure_sentiments = []
for i, review in enumerate(df_gap['Review Content']):
    if pd.notna(review):
        sentiment = sentiment_analysis_example(client, review)
        azure_sentiments.append(sentiment)
        
        # Handle API rate limit
        time_start = handle_rate_limit(i + 1, time_start, requests_per_second_limit, requests_per_minute_limit)
    else:
        azure_sentiments.append(None)

# Add the sentiment results back to the DataFrame
df_gap['Sentiment'] = azure_sentiments

# Display the updated DataFrame
print(df_gap)


    Rating                                 Review Title  \
1        1     Carrier will leave your package anywhere   
2        1       Absolutely terrible online service in…   
3        5         GAP Bridgewater, NJ  team is AWESOME   
4        1                    terrible customer service   
..     ...                                          ...   
482      4                                   good stuff   
483      5                          Nothing but ok.....   
484      5                        Best prices for Jeans   
485      1  They say they "can't" and they mean "won't"   
486      1                               don't go there   

                                        Review Content  Date of Experience  \
1    Order from them they used some other carrier n...  September 24, 2024   
2    Absolutely terrible online service in addition...     August 13, 2024   
3    GAP Bridgewater, NJ  team is AWESOME. They hel...     August 10, 2024   
4    the customer service people at Ga

In [26]:
sentiment_summary_gap = df_gap['Sentiment'].value_counts()
print(sentiment_summary_gap)

Sentiment
negative    287
neutral     104
positive     85
Name: count, dtype: int64


In [16]:
import requests
import time
import pandas as pd
from azure.ai.textanalytics import TextAnalyticsClient
from azure.core.credentials import AzureKeyCredential

# Azure API credentials
endpoint = "https://dhairya1899.cognitiveservices.azure.com/"
key = "79f35fa73b5c4ff08a284193f18068cf"

# Authenticate client
def authenticate_client():
    ta_credential = AzureKeyCredential(key)
    text_analytics_client = TextAnalyticsClient(endpoint=endpoint, credential=ta_credential)
    return text_analytics_client

client = authenticate_client()

# Function for sentiment analysis
def sentiment_analysis_example(client, review):
    documents = [review]
    response = client.analyze_sentiment(documents=documents)[0]
    sentiments = []
    for sentence in response.sentences:
        sentiments.append(sentence.sentiment)
    return sentiments[0]  # Get the overall sentiment of the first sentence

# Initialize rate limiting variables
requests_per_second_limit = 100
requests_per_minute_limit = 300
time_start = time.time()

# Function to handle rate limiting
def handle_rate_limit(request_count, start_time, per_second_limit, per_minute_limit):
    elapsed_time = time.time() - start_time
    if request_count % per_second_limit == 0:
        if elapsed_time < 1:
            time.sleep(1 - elapsed_time)
    if request_count % per_minute_limit == 0:
        if elapsed_time < 60:
            time.sleep(60 - elapsed_time)
    return time.time()

# Perform sentiment analysis with rate limiting
azure_sentiments = []
for i, review in enumerate(df_old_navy['Review Content']):
    if pd.notna(review):
        sentiment = sentiment_analysis_example(client, review)
        azure_sentiments.append(sentiment)
        
        # Handle API rate limit
        time_start = handle_rate_limit(i + 1, time_start, requests_per_second_limit, requests_per_minute_limit)
    else:
        azure_sentiments.append(None)

# Add the sentiment results back to the DataFrame
df_old_navy['Sentiment'] = azure_sentiments

# Display the updated DataFrame
print(df_old_navy.head())

  Rating                         Review Title  \
0      5                   Affordable clothes   
1      1  Old Navy uses OnTrac/Lasership for…   
2      1      If 1 could give 0 stars I would   
3      1          STOP USING LASERSHIP/ONTRAC   
4      1       Ordered a bathing suit for my…   

                                      Review Content Date of Experience  \
0  Great clothes and customer service. I DONT kno...    August 20, 2024   
1  Old Navy uses OnTrac/Lasership for deliver. Th...    August 02, 2024   
2  If 1 could give 0 stars I would. per old navy ...    August 19, 2024   
3  Old Navy’s products are fine for the price. Th...    August 02, 2024   
4  Ordered a bathing suit for my granddaughter.  ...      July 20, 2024   

  Sentiment  
0  positive  
1   neutral  
2  negative  
3  positive  
4  positive  


In [17]:
sentiment_summary_old_navy = df_old_navy['Sentiment'].value_counts()
print(sentiment_summary_old_navy)

Sentiment
negative    252
neutral     115
positive     49
Name: count, dtype: int64


In [20]:
import requests
import time
import pandas as pd
from azure.ai.textanalytics import TextAnalyticsClient
from azure.core.credentials import AzureKeyCredential

# Azure API credentials
endpoint = "https://dhairya1899.cognitiveservices.azure.com/"
key = "79f35fa73b5c4ff08a284193f18068cf"

# Authenticate client
def authenticate_client():
    ta_credential = AzureKeyCredential(key)
    text_analytics_client = TextAnalyticsClient(endpoint=endpoint, credential=ta_credential)
    return text_analytics_client

client = authenticate_client()

# Function for sentiment analysis
def sentiment_analysis_example(client, review):
    documents = [review]
    response = client.analyze_sentiment(documents=documents)[0]
    sentiments = []
    for sentence in response.sentences:
        sentiments.append(sentence.sentiment)
    return sentiments[0]  # Get the overall sentiment of the first sentence

# Initialize rate limiting variables
requests_per_second_limit = 100
requests_per_minute_limit = 300
time_start = time.time()

# Function to handle rate limiting
def handle_rate_limit(request_count, start_time, per_second_limit, per_minute_limit):
    elapsed_time = time.time() - start_time
    if request_count % per_second_limit == 0:
        if elapsed_time < 1:
            time.sleep(1 - elapsed_time)
    if request_count % per_minute_limit == 0:
        if elapsed_time < 60:
            time.sleep(60 - elapsed_time)
    return time.time()

# Perform sentiment analysis with rate limiting
azure_sentiments = []
for i, review in enumerate(df_banana_republic['Review Content']):
    if pd.notna(review):
        sentiment = sentiment_analysis_example(client, review)
        azure_sentiments.append(sentiment)
        
        # Handle API rate limit
        time_start = handle_rate_limit(i + 1, time_start, requests_per_second_limit, requests_per_minute_limit)
    else:
        azure_sentiments.append(None)

# Add the sentiment results back to the DataFrame
df_banana_republic['Sentiment'] = azure_sentiments

# Display the updated DataFrame
print(df_banana_republic.head())

  Rating                            Review Title  \
0      1  I am trying to return items I ordered…   
1      1    Please don't order from this company   
2      1                Shapeless, happless mass   
3      1             Sends garbage as final sale   
4      4  I used to work in the Banana Republic…   

                                      Review Content  Date of Experience  \
0  I am trying to return items I ordered from Ban...  September 23, 2024   
1  Please don't order from this company. I wish I...  September 16, 2024   
2  I used to love Banana Republic, bought many of...     August 23, 2024   
3  I purchased two final sale items from Banana R...     August 26, 2024   
4  I used to work in the Banana Republic in Marke...     August 30, 2024   

  Sentiment  
0   neutral  
1  negative  
2  positive  
3   neutral  
4   neutral  


In [21]:
sentiment_banana_republic = df_banana_republic['Sentiment'].value_counts()
print(sentiment_banana_republic)

Sentiment
negative    69
neutral     30
positive    24
Name: count, dtype: int64


In [38]:
# Count the occurrences of each sentiment
print(f'Sentiment Score Old Navy')
sentiment_counts = df_old_navy['Sentiment'].value_counts()
# Calculate total counts
total_counts = sentiment_counts.sum()
# Calculate percentages
sentiment_percentages = (sentiment_counts / total_counts) * 100
# Print the percentages
print(sentiment_percentages)

Sentiment Score Old Navy
Sentiment
negative    60.576923
neutral     27.644231
positive    11.778846
Name: count, dtype: float64
