In [1]:
import requests
from bs4 import BeautifulSoup
import csv
import pandas as pd
import spacy
from textblob import TextBlob

In [2]:
def parser():
    with open("reviews.html", "r", encoding="utf-8") as file:
        html = file.read()

    soup = BeautifulSoup(html, 'html.parser')

    reviews = soup.find_all(class_='review-text-content')
    extracted_reviews = []
    for review in reviews:
        extracted_reviews.append(review.text)

    print(extracted_reviews)

    extracted_reviews = [extracted_review[1:-1] for extracted_review in extracted_reviews]

    with open('reviews.csv', mode='a', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        # writer.writerow('reviews')
        for extracted_review in extracted_reviews:
            writer.writerow([extracted_review])

# parser()

In [3]:
def sentiment_analysis():
    def preprocessing_text(text):
        doc = sp(text.lower())
        tokens = [token for token in doc if not token.is_stop and not token.is_digit and not token.is_punct]
        tokens = [token.lemma_ for token in tokens]
        return ' '.join(tokens)

    def get_sentiment(text):
        return TextBlob(text).sentiment.polarity


    df = pd.read_csv('reviews.csv')    
    df = df.dropna()

    sp = spacy.load('en_core_web_sm')
    df['processed_reviews'] = df['reviews'].apply(preprocessing_text)

    df['polarity'] = df['processed_reviews'].apply(get_sentiment)


    sentiment_list = []
    pos = 0
    neg = 0
    neu = 0

    for num in df['polarity']:
        if num > 0:
            pos = pos + 1
            sentiment_list.append('Positive')

        elif num < 0:
            neg = neg + 1
            sentiment_list.append('Negative')

        else:
            neu = neu + 1
            sentiment_list.append('Neutral')

    df['sentiment'] = sentiment_list

    print("Total positive opinions:", pos)
    print("Total negative opinions:", neg)
    print("Total neutral opinions:", neu)

    print("Positive Sentiments:")
    print(df[df['sentiment'] == 'Positive'].head())

    print("\nNegative Sentiments:")
    print(df[df['sentiment'] == 'Negative'].head())

    print("\nNeutral Sentiments:")
    print(df[df['sentiment'] == 'Neutral'].head())

    df[df['sentiment'] == 'Negative'].to_csv('negative_reviews.csv', index=False)

    return df

    
df = sentiment_analysis()

Total positive opinions: 191
Total negative opinions: 29
Total neutral opinions: 27
Positive Sentiments:
                                             reviews  \
0  Title: "Pintola Pinneat Butter - A Five-Star N...   
1  Pintola Peanut Butter tastes brilliant especia...   
2  Taste is incredible. Like the fact that it doe...   
3  Pintola Unsweetened Crunchy peanut butter is a...   
4  Best peanut butter I tasted which is healthy a...   

                                   processed_reviews  polarity sentiment  
0  title pintola pinneat butter star nutty deligh...  0.469014  Positive  
1  pintola peanut butter taste brilliant especial...  0.283429  Positive  
2  taste incredible like fact thicken like artifi...  0.412000  Positive  
3  pintola unsweetene crunchy peanut butter winne...  0.538095  Positive  
4  good peanut butter taste healthy tasty bottle ...  0.464286  Positive  

Negative Sentiments:
                                               reviews  \
66     No artificial flavors

In [5]:
keyword_solutions = {
    'oil': 'It is the natural oil present in peanut. There is no extra oil added. Place the container upside down for sometime for the oil to mix.',
    'oily': 'It is the natural oil present in peanut. There is no extra oil added. Place the container upside down for sometime for the oil to mix.',
    'leak': 'Send images with proof to customer support for further assistance.',
    'falvour': 'Try our flavoured peanut butter range next time. This one does not have any additives.',
    'consistent': 'Mix well with a spoon.',
    'packag': 'Send images with proof to customer support for further assistance.',
    'falvour': 'Try our flavoured peanut butter range next time. This one does not have any additives.',
    'consistency': 'Mix well with a spoon.',
    'expire': 'Refrigerate for long lasting freshness.'
}

negative_reviews = df[df['polarity'] < 0]

def find_solution(text):
    for keyword, solution in keyword_solutions.items():
        if keyword.lower() in text.lower():
            return solution
    return None

negative_reviews['solution'] = negative_reviews['processed_reviews'].apply(find_solution)

negative_reviews = negative_reviews.dropna(subset=['solution'])

negative_reviews[['reviews', 'solution']].to_csv('negative_reviews_with_solutions.csv', index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  negative_reviews['solution'] = negative_reviews['processed_reviews'].apply(find_solution)
