In [None]:
!pip install transformers

# Scrapping data function

In [1]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
import time

def get_reviews(url): #returns list
    comments_output = []
    result = requests.get(url)
    src = result.content
    soup = BeautifulSoup(src, 'lxml')

    comments = soup.find_all("div", {"class": "description"})

    for i in range(len(comments)):
        comments_output.append(comments[i].text)

    return comments_output

In [None]:
#data scrapping
import os
# Check if the CSV file already exists
if os.path.exists('CSV_OUT.csv'):
    df = pd.read_csv('CSV_OUT.csv')  # Load existing CSV file into DataFrame
else:
    df = pd.DataFrame()  # Create an empty DataFrame with a 'Review' column

for i in range(13): #13 review page
    time.sleep(1)
    reviews = get_reviews("https://www.chickadvisor.com/item/quality-street-chocolates/?page={0}#reviews".format(i+1))
    df = df.append(pd.DataFrame({'Review': reviews}))
    print(f"Page {i+1} has been scrapped successfully")

df = df.reset_index(drop=True)
df.to_csv('CSV_OUT.csv')

# Sentiment analysis and zero-shot classification

In [None]:
from transformers import pipeline
sentimentanalyzer = pipeline("sentiment-analysis")
aspectclassifier = pipeline("zero-shot-classification")

In [7]:
#aspect:rank
aspects = {
            'Taste':[0,0], 'Packaging':[0,0], 'Price':[0,0]
}
products_aspect_dict = {}

for aspect in aspects:
    df[f'{aspect}POS'] = 0
    df[f'{aspect}NEG'] = 0


In [None]:
for index, row in df.iterrows():
        reviewId = index
        aspect_out = aspectclassifier(row['Review'][:512], candidate_labels=list(aspects.keys()))
        '''
        aspect_out INPUT OUTPUT SAMPLE:
        sequence = "Who are you voting for in 2020?"
        candidate_labels = ["politics", "public health", "economics"]

        classifier(sequence, candidate_labels)
        --------------------------------------------------------------
        {'labels': ['politics', 'economics', 'public health'],
         'scores': [0.972518801689148, 0.01458414364606142, 0.012897025793790817],
         'sequence': 'Who are you voting for in 2020?'}
        '''
        sentiment_out = sentimentanalyzer(row['Review'][:512])
        '''
        sentiment_out INPUT OUTPUT SAMPLE:
        sentiment_pipeline = pipeline("sentiment-analysis")
        data = ["I love you", "I hate you"]
        ----------------------------------------------------
        [{'label': 'POSITIVE', 'score': 0.9998},
         {'label': 'NEGATIVE', 'score': 0.9991}]
        '''
        print(f"Review ID: {reviewId}")
        print(f"Text: {row['Review'][:512]}")
        print(f"Dictionary: {sentiment_out}")

        for aspect, aspect_score in zip(aspect_out['labels'], aspect_out['scores']):
            if sentiment_out[0]['label'] == 'POSITIVE':
                df.loc[index, f'{aspect}POS'] = sentiment_out[0]['score'] * aspect_score
                df.loc[index, f'{aspect}NEG'] = 0

            elif sentiment_out[0]['label'] == 'NEGATIVE':
                df.loc[index, f'{aspect}POS'] = 0
                df.loc[index, f'{aspect}NEG'] = sentiment_out[0]['score'] * aspect_score


            print(f"Aspect: {aspect}, aspect_scores: {aspect_score}")

        #exporting file
        df.to_csv('CSV_OUT.csv')
        print('-'*100)