# Project

This notebook contains the generation of the top 3 of the best and the worst products

## Imports

In [2]:
import pandas as pd

## Load Dataset

In [3]:
# load dataset and have a quick look on the data we have
data = pd.read_csv('lists/reviews_with_predicted_sentiment_category.csv', sep=',', low_memory=False)
data.head()

Unnamed: 0,name,reviews.rating,reviews.text,reviews.title,predicted_sentiment,predicted_categorie
0,"All-New Fire HD 8 Tablet, 8 HD Display, Wi-Fi,...",5,This product so far has not disappointed. My c...,Kindle,positive,Amazon Fire Tablet
1,"All-New Fire HD 8 Tablet, 8 HD Display, Wi-Fi,...",5,great for beginner or experienced person. Boug...,very fast,positive,Amazon Fire Tablet
2,"All-New Fire HD 8 Tablet, 8 HD Display, Wi-Fi,...",5,Inexpensive tablet for him to use and learn on...,Beginner tablet for our 9 year old son.,positive,Amazon Fire Tablet
3,"All-New Fire HD 8 Tablet, 8 HD Display, Wi-Fi,...",4,I've had my Fire HD 8 two weeks now and I love...,Good!!!,positive,Amazon Fire Tablet
4,"All-New Fire HD 8 Tablet, 8 HD Display, Wi-Fi,...",5,I bought this for my grand daughter when she c...,Fantastic Tablet for kids,positive,Amazon Fire Tablet


## Helper methods

In [4]:
def get_best_worst_products(group):
    """
    Get the top 3 and bottom 3 products from a group.

    Args:
        group (pandas.DataFrame): A DataFrame containing product data, including 'average_score' column.

    Returns:
        pandas.DataFrame: A DataFrame containing the top 3 and bottom 3 products with a 'label' column indicating 'best' or 'worst'.
    """

    # Sort by weighted score in descending order
    group_sorted = group.sort_values('average_score', ascending=False)

    # Select top 3 and bottom 3, ensuring no overlap
    top_3 = group_sorted.head(3).copy()
    top_3['label'] = 'best'

    bottom_3 = group_sorted.tail(3).copy()
    bottom_3['label'] = 'worst'

    # Concatenate and return
    return pd.concat([top_3, bottom_3])

In [5]:
def sentiment_score(sentiment):
    """
    Converts a sentiment category to a numerical score.

    Args:
        sentiment (str): The sentiment category ('positive', 'neutral', or 'negative').

    Returns:
        int: A numerical score representing the sentiment (1 for positive, 2 for neutral, 3 for negative).
    """

    return {'positive': 1, 'neutral': 2, 'negative': 3}[sentiment]

## Create list

In [6]:
# Apply the sentiment score to the DataFrame
data['sentiment_score'] = data['predicted_sentiment'].apply(sentiment_score)

# Step 2: Calculate a weighted score by combining rating and sentiment score
data['weighted_score'] = data['reviews.rating'] * data['sentiment_score']

# Step 3: Group by 'name' and 'predicted_categorie' to calculate the average weighted score
# Count the number of reviews per product to calculate the average
product_scores = data.groupby(['name', 'predicted_categorie']).agg(
    total_weighted_score=('weighted_score', 'sum'),
    num_reviews=('weighted_score', 'count')
).reset_index()

# Calculate the average weighted score per product
product_scores['average_score'] = product_scores['total_weighted_score'] / product_scores['num_reviews']

# Now get the top 3 of the best and worst products
best_worst_products = product_scores.groupby('predicted_categorie').apply(get_best_worst_products).reset_index(drop=True)

best_worst_products

  best_worst_products = product_scores.groupby('predicted_categorie').apply(get_best_worst_products).reset_index(drop=True)


Unnamed: 0,name,predicted_categorie,total_weighted_score,num_reviews,average_score,label
0,"Amazon Fire Tv Kindle Dx Leather Cover, Black ...",Amazon Echo White,5,1,5.0,best
1,Echo (Black) Echo (Black),Amazon Echo White,15,3,5.0,best
2,Kindle Oasis E-reader with Leather Charging Co...,Amazon Echo White,30,6,5.0,best
3,Kindle Oasis E-reader with Leather Charging Co...,Amazon Echo White,329,67,4.910448,worst
4,Echo (White) Echo (White),Amazon Echo White,16031,3309,4.844666,worst
5,"Kindle Dx Leather Cover, Black (fits 9.7 Displ...",Amazon Echo White,43,9,4.777778,worst
6,"Amazon Fire Kids Edition Tablet, 7 Display, Wi...",Amazon Fire Kids Edition,29,6,4.833333,best
7,"Fire Kids Edition Tablet, 7 Display, Wi-Fi, 16...",Amazon Fire Kids Edition,8114,1685,4.81543,best
8,Brand New Amazon Kindle Fire 16gb 7 Ips Displa...,Amazon Fire Kids Edition,4964,1033,4.805421,best
9,"Amazon Fire Kids Edition Tablet, 7 Display, Wi...",Amazon Fire Kids Edition,29,6,4.833333,worst


In [7]:
# save dataset in a csv file for a later usage
best_worst_products.to_csv('lists/reviews_top_3.csv', index=False)