<a href="https://colab.research.google.com/github/VellummyilumVinoth/Aspect_based_sentimental_analysis/blob/main/content_based_filtering_for_a_new_user.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [16]:

from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [24]:
# Step 1: Load the dataset
import pandas as pd
dataset = pd.read_csv('/content/drive/MyDrive/zidan/aspect_extracted_file_info.csv')

In [25]:
dataset

Unnamed: 0,Product_name,Review,Overall Sentiment Score,reviewer_name,age,gender,area
0,"Salate Cordless Electric Spin Scrubber, Electr...",I absolutely love this wireless electric spin ...,0,Taylor Saini,37,Female,Western
1,"Salate Cordless Electric Spin Scrubber, Electr...",i used this to clean my bathroom and this work...,1,Brown Emily,64,Male,Southern
2,"Salate Cordless Electric Spin Scrubber, Electr...",This electric brush makes cleaning so much eas...,2,Jones Michael,24,Female,Western
3,"Salate Cordless Electric Spin Scrubber, Electr...",This Electric spin scrubber is perfect! Use it...,0,Smith Matthew,44,Male,North Western
4,"Salate Cordless Electric Spin Scrubber, Electr...",I don’t like cleaning but this machine help me...,0,Maryam Sophia,35,Female,Southern
...,...,...,...,...,...,...,...
11052,2 Pack Pumice Stone for Toilet Cleaning with E...,Work great,0,Soosai David,51,Female,Eastern
11053,2 Pack Pumice Stone for Toilet Cleaning with E...,The tool works fairly well for removing hard s...,4,Brown Olivia,75,Male,North Central
11054,2 Pack Pumice Stone for Toilet Cleaning with E...,Long handle so you don’t have to put your hand...,1,Christopher Zidan,67,Female,North Western
11055,2 Pack Pumice Stone for Toilet Cleaning with E...,It works very nicely on these Arizona hard wat...,1,Vithujan John,38,Female,Uva


## Content based filtering for new user using age, gender and area

In [30]:
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Extract the features
reviews = dataset["Review"]
product_names = dataset["Product_name"]
overall_sentiment_score = dataset["Overall Sentiment Score"]
reviewer_name = dataset["reviewer_name"]
age = dataset["age"]
gender = dataset["gender"]
area = dataset["area"]

# Create a TF-IDF vectorizer
vectorizer = TfidfVectorizer()

# Transform the reviews into TF-IDF vectors
review_vectors = vectorizer.fit_transform(reviews)

# Calculate the cosine similarity between all pairs of products
product_similarity = cosine_similarity(review_vectors)

# Create a function to recommend products to a user
def recommend_products(user_age, user_gender, user_area):
    # Validate age input
    if user_age <= 18 or user_age >= 75:
        raise ValueError("Age should be between 18 and 75.")
        
    # Validate gender input
    valid_genders = ["Male", "Female", "Other"]
    if user_gender not in valid_genders:
        raise ValueError("Invalid gender. Please enter 'Male', 'Female', or 'Other'.")
        
    # Validate area input
    valid_areas = ["Central", "Eastern", "Northern", "North Central", "North Western", "Sabaragamuwa", "Southern", "Uva", "Western"]
    if user_area not in valid_areas:
        raise ValueError("Invalid area. Please enter a valid province name.")

    # Filter products that match the user's age, gender, and area
    filtered_indices = np.where((age == user_age) & (gender == user_gender) & (area == user_area))[0]

    # Check if there are no matching products
    if len(filtered_indices) == 0:
        raise ValueError("No products found for the specified criteria.")

    # Calculate the similarity scores for the filtered products
    similarity_scores = np.mean(product_similarity[filtered_indices], axis=0)

    # Handle NaN values in similarity_scores
    similarity_scores = np.nan_to_num(similarity_scores)

    # Sort the products by their similarity to the user
    similar_products = np.argsort(similarity_scores)[::-1]

    # Initialize set to keep track of recommended product names
    recommended_set = set()

    # Return the top 10 unique products with their scores
    recommendations = []
    for rank, product in enumerate(similar_products):
        if len(recommendations) >= 10:
            break
        product_name = product_names[product]
        if product_name not in recommended_set:
            score = similarity_scores[product]
            recommendations.append((rank + 1, product_name, score))
            recommended_set.add(product_name)
    return recommendations

# Recommend products to a user
user_age = 37
user_gender = "Female"
user_area = "Western"

try:
    recommended_products = recommend_products(user_age, user_gender, user_area)
    # Print the recommended products with rank numbers and scores
    for rank, product, score in recommended_products:
        print(f"Rank {rank}: {product}")
except ValueError as e:
    print("Error:", str(e))

Rank 1: LiBa Electric Bug Zapper, Indoor Insect Killer - (2) Extra Replacement Bulbs - Fly, Mosquito Killer and Repellent - Lightweight, Powerful 2800V Grid, Easy-to-Clean, with a Removable Washable Tray.
Rank 2: Flowtron BK-80D 80-Watt Electronic Insect Killer, 1-1/2 Acre Coverage , Black
Rank 3: [2 Pack] Pumice Stone for Toilet Cleaning, Pumice Cleaning Stone Toilet Bowl with Extra Long Handle for Removing Toilet Bowl Ring, Pool, Bathroom,Toilet Brush, Tiles & BBQ Grills
Rank 4: Leather Honey Leather Conditioner, Best Leather Conditioner Since 1968. for Use on Leather Apparel, Furniture, Auto Interiors, Shoes, Bags and Accessories. Non-Toxic and Made in The USA!…
Rank 5: Swedish Wholesale Swedish Dish Cloths for Kitchen- 10 Pack Reusable Paper Towels for Counters & Dishes - Eco Friendly Cellulose Sponge Cloth - Assorted
Rank 6: Kleenex Expressions Disposable Paper Hand Towels, Paper Hand Towels for Bathroom, 6 Boxes, 60 Hand Towels per Box (360 Total Tissues)
Rank 7: Panasonic BK-3MC

In [27]:
dataset

Unnamed: 0,Product_name,Review,Overall Sentiment Score,reviewer_name,age,gender,area
0,"Salate Cordless Electric Spin Scrubber, Electr...",I absolutely love this wireless electric spin ...,0,Taylor Saini,37,Female,Western
1,"Salate Cordless Electric Spin Scrubber, Electr...",i used this to clean my bathroom and this work...,1,Brown Emily,64,Male,Southern
2,"Salate Cordless Electric Spin Scrubber, Electr...",This electric brush makes cleaning so much eas...,2,Jones Michael,24,Female,Western
3,"Salate Cordless Electric Spin Scrubber, Electr...",This Electric spin scrubber is perfect! Use it...,0,Smith Matthew,44,Male,North Western
4,"Salate Cordless Electric Spin Scrubber, Electr...",I don’t like cleaning but this machine help me...,0,Maryam Sophia,35,Female,Southern
...,...,...,...,...,...,...,...
11052,2 Pack Pumice Stone for Toilet Cleaning with E...,Work great,0,Soosai David,51,Female,Eastern
11053,2 Pack Pumice Stone for Toilet Cleaning with E...,The tool works fairly well for removing hard s...,4,Brown Olivia,75,Male,North Central
11054,2 Pack Pumice Stone for Toilet Cleaning with E...,Long handle so you don’t have to put your hand...,1,Christopher Zidan,67,Female,North Western
11055,2 Pack Pumice Stone for Toilet Cleaning with E...,It works very nicely on these Arizona hard wat...,1,Vithujan John,38,Female,Uva


## Collaborative filtering for old user using their sentimental score for the reviews by them

In [28]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Filter the data for the old user
old_user = 'Johnson Sophia'
old_user_data = dataset[dataset['reviewer_name'] == old_user]

# Filter the data for products with sentiment score less than 2
filtered_data = dataset[dataset['Overall Sentiment Score'] < 2].copy()

# Create a TF-IDF vectorizer
vectorizer = TfidfVectorizer()

# Apply TF-IDF vectorization to the review text
review_vectors = vectorizer.fit_transform(filtered_data['Review'])

# Calculate cosine similarity between the old user's reviews and the filtered products
cosine_similarities = cosine_similarity(vectorizer.transform(old_user_data['Review']), review_vectors)

# Get the average sentiment score for each product
filtered_data['Average_Sentiment'] = filtered_data.groupby('Product_name')['Overall Sentiment Score'].transform('mean')

# Combine the average sentiment scores with the cosine similarities
filtered_data['Score'] = cosine_similarities[0] * filtered_data['Average_Sentiment']

# Sort the data based on the score
sorted_data = filtered_data.sort_values('Score', ascending=False)

# Add ranking numbers to the sorted data
sorted_data['Rank'] = range(1, len(sorted_data) + 1)

# Get the top 10 recommended products with ranking numbers
top_10_products = sorted_data[['Rank', 'Product_name', 'Score']].head(10)

print("Top 10 recommended products for", old_user, "with their past reviews:")
print(top_10_products)


Top 10 recommended products for Johnson Sophia with their past reviews:
      Rank                                       Product_name     Score
32       1  Dixie Paper Plates, 8 1/2 inch, Dinner Size Pr...  0.285714
2900     2  Amazon Basics 20-Pack AAA Alkaline Batteries, ...  0.212456
8949     3  400 Pack Extra Heavyweight Disposable White Pl...  0.170558
5298     4  [300 Pack] Disposable White Uncoated Paper Pla...  0.167279
7768     5  Henreepow Ni-MH AA Rechargeable Batteries, Dou...  0.123200
7412     6  Dixie PerfecTouch WiseSize Coffee Design Insul...  0.123011
4844     7  100% Compostable 9 Inch Heavy-Duty Paper Plate...  0.118775
308      8  Amazon Basics Sandwich Storage Bags, 300 Count...  0.117583
3478     9  100% Compostable 7 Inch Paper Plates [125-Pack...  0.116257
1600    10  100% Compostable 9 Inch Heavy-Duty [125-Pack] ...  0.109026


In [29]:
john_data = dataset[dataset['reviewer_name'] == 'Johnson Sophia']

# Print the data for reviewer name "Johnson Sophia"
john_data

Unnamed: 0,Product_name,Review,Overall Sentiment Score,reviewer_name,age,gender,area
32,"Dixie Paper Plates, 8 1/2 inch, Dinner Size Pr...",I liked these plates. They aren't super thick ...,1,Johnson Sophia,43,Male,Eastern
45,"Kleenex Expressions Ultra Soft Facial Tissues,...",Just like the convenience of buying several bo...,1,Johnson Sophia,43,Male,Eastern
280,Swiffer Sweeper Wet Mopping Cloth Multi Surfac...,Love these wet ones. They are lavender scented...,0,Johnson Sophia,43,Male,Eastern
547,"Angel Soft Toilet Paper, 48 Mega Rolls = 192 R...",This is one of those types of products that yo...,0,Johnson Sophia,43,Male,Eastern
565,"HOMEXCEL Microfiber Cleaning Cloth,12 Pack Cle...",This area is in a severe drought but still has...,4,Johnson Sophia,43,Male,Eastern
1003,"ACDelco 48-Count AAA Batteries, Maximum Power ...",My battery hunting days are over. When you got...,4,Johnson Sophia,43,Male,Eastern
1496,GLAD ForceFlexPlus Large Drawstring Trash Bags...,Good strength and fits the bill.,0,Johnson Sophia,43,Male,Eastern
2391,"Brawny® Tear-A-Square® Paper Towels, 6 Triple ...",Love this brand ! .,0,Johnson Sophia,43,Male,Eastern
2634,Energizer Alkaline Power C Batteries (12 Pack)...,I actually order wrong ones. Its all good.,2,Johnson Sophia,43,Male,Eastern
2787,Cottonelle Ultra Clean Toilet Paper with Activ...,Cottonelle Ultra Clean Toilet Paper with Activ...,0,Johnson Sophia,43,Male,Eastern
