In [82]:
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from nltk.tokenize import word_tokenize
from sklearn.preprocessing import MinMaxScaler

In [83]:
def calculate_helpfulness(review_text, product_keywords):
    # step 1: Preprocessing
    review_tokens = word_tokenize(review_text)
    review_tokens = [t for t in review_tokens if t.lower(
    ) not in nltk.corpus.stopwords.words('english')]

    # step 2: Positivity scores
    sia = SentimentIntensityAnalyzer()
    positivity_score = sia.polarity_scores(review_text)['compound']

    # step 3: Relevance Scores
    tfidf_vectorizer = TfidfVectorizer()
    review_vector = tfidf_vectorizer.fit_transform([review_text])
    product_vector = tfidf_vectorizer.transform(product_keywords)
    relevance_score = cosine_similarity(review_vector, product_vector)[0][0]

    # step 4: Informative Scores
    informative_score = len(set(review_tokens))

    # step 5: length score
    length_score = len(review_text)

    # Normalization
    scaler = MinMaxScaler()
    informative_score_normalized = scaler.fit_transform([[informative_score]])[0][0]
    length_score_normalized = scaler.fit_transform([[length_score]])[0][0]

    # step 6: helpfulness score
    helpfulness_score = (positivity_score * 0.4) + (relevance_score * 0.3) + (informative_score * 0.2) + (length_score * 0.1)
    print(f"positivity score: {positivity_score}")
    print(f"relevance score: {relevance_score}")
    print(f"informative score: {informative_score}")
    print(f"length score: {length_score}")

    return helpfulness_score

In [84]:
review_text = 'this product is amazing! it exceeded my expectations in every way'
product_keywords = ["product", "exceeded expectations", "amazing"]
hepfulness_score = calculate_helpfulness(review_text, product_keywords)
hepfulness_score

positivity score: 0.6239
relevance score: 0.30151134457776363
informative score: 7
length score: 65


8.240013403373329

In [85]:
import json
with open('C:/Hammad Aslam/BS IT (post ADP)/3rd Semester/Capstone Project/Project/backend/datasets/categories/allFiles/computers_laptops.json', 'r') as f:
    data = json.load(f)
print(f"review body: {data[2]['reviews'][1]['review_body']}")
print(f"review topics: {data[2]['reviews'][1]['review_topics']}")
calculate_helpfulness(data[2]['reviews'][1]['review_body'], data[2]['reviews'][1]['review_topics'])

review body: A powerful, well constructed notebook that provides some useful new features.  So far it has performed well and runs my most demanding applications with no problems.  I particularly like the compactness of the 14.1 inch display and the keyboard has an excellent feel to it.  Things I don't like are the poor speaker sound quality and positioning, the highly reflective display, the Vista user interface, and the lack of a true display mechanical latch.  It would have been nice to have had a CF memory card reader.  This product comes from Amazon with a free upgrade to Windows 7, a nice carry case and a useable optical mouse.
review topics: ['14.1 inch display', 'Windows 7', 'carry case', 'optical mouse', 'reflective display', 'vista user interface', 'no CF memory card reader']
positivity score: 0.9589
relevance score: 0.2077929087308457
informative score: 59
length score: 627


74.94589787261926

In [101]:
# Import necessary libraries
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
import json

with open('C:/Hammad Aslam/BS IT (post ADP)/3rd Semester/Capstone Project/Project/backend/datasets/categories/allFiles/computers_laptops.json', 'r') as f:
    data = json.load(f)
# Sample review data
review = data[4]['reviews'][0]
review_rating = float(review['review_rating'])
review_text = review['review_body']
topics = review['review_topics']
helpful_votes = int(review['review_votes'])  # Number of helpful votes provided by other customers

# Text analysis using NLTK sentiment analysis
def calculate_text_analysis_score(review_text, topics):
    sia = SentimentIntensityAnalyzer()
    sentiment_scores = sia.polarity_scores(review_text)
    # Adjust sentiment score based on relevance of topics
    topic_relevance = sum([review_text.lower().count(topic.lower()) for topic in topics])
    adjusted_score = sentiment_scores['compound'] + (0.1 * topic_relevance)  # Adjust sentiment score based on topic relevance
    return adjusted_score  # Compound score ranges from -1 (negative) to 1 (positive)

# Define rating weight, text weight, and helpful votes weight coefficients
rating_weight = 0.2
text_weight = 0.4
helpful_votes_weight = 0.4

# Calculate the helpfulness score
text_analysis_score = calculate_text_analysis_score(review_text, topics)
helpfulness_score = (rating_weight * review_rating) + (text_weight * text_analysis_score) + (helpful_votes_weight * helpful_votes)
print(helpfulness_score)

# Normalize the helpfulness score to range from 0 to 1
min_score = min(review_rating, -1) * rating_weight + min(text_analysis_score, -1) * text_weight + min(helpful_votes, 0) * helpful_votes_weight
max_score = max(review_rating, 1) * rating_weight + max(text_analysis_score, 1) * text_weight + max(helpful_votes, 1) * helpful_votes_weight
normalized_score = (helpfulness_score - min_score) / (max_score - min_score)

print("Normalized Helpfulness Score:", normalized_score)


2.3918800000000005
Normalized Helpfulness Score: 0.9349625000000001
