<a href="https://colab.research.google.com/github/VellummyilumVinoth/Aspect_based_sentimental_analysis/blob/main/recommendation_system_for_old_and_new_users.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [13]:
# Step 1: Load the dataset
import pandas as pd
dataset = pd.read_csv('/content/drive/MyDrive/zidan/aspect_extracted_file_info.csv')

In [14]:
dataset

Unnamed: 0,Product_name,Review,Overall Sentiment Score,reviewer_name,age,gender,area
0,"Sunlight Care Detergent Liquid, 1L",I purchased 2 bottles. Both seem partially use...,3,Williams Thinesh,58,Female,Western
1,"Sunlight Care Detergent Liquid, 1L",Very fast & safe delivery . Neet packing. I wi...,0,Anderson John,46,Male,Southern
2,"Sunlight Care Detergent Liquid, 1L",Fast delivery during this pandemic. Got the pr...,0,Miller Sophia,38,Female,Western
3,"Sunlight Care Detergent Liquid, 1L",Alot of liquid has leaked.im very much disappo...,0,Maryam Michael,19,Male,North Central
4,"Sunlight Care Detergent Liquid, 1L",Thank you very much !! I received my package t...,0,Johnson Emily,66,Female,North Western
...,...,...,...,...,...,...,...
1837,Sustagen Nutritional Supplement Chocolate Flav...,very good product value for the price,0,Smith Jane,43,Male,Sabaragamuwa
1838,Sustagen Nutritional Supplement Chocolate Flav...,Delivery within 1day to Tangalle... well packe...,0,Kumar Saini,32,Female,Central
1839,Sustagen Nutritional Supplement Chocolate Flav...,high quality reasonable price,2,Soosai Emily,51,Male,Southern
1840,Sustagen Nutritional Supplement Chocolate Flav...,Excellent packaging. And it was delivered quic...,0,Bathri Daniel,25,Male,North Western


## Content based filtering for new user using age, gender and area

In [15]:
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Extract the features
reviews = dataset["Review"]
product_names = dataset["Product_name"]
overall_sentiment_score = dataset["Overall Sentiment Score"]
reviewer_name = dataset["reviewer_name"]
age = dataset["age"]
gender = dataset["gender"]
area = dataset["area"]

# Create a TF-IDF vectorizer
vectorizer = TfidfVectorizer()

# Transform the reviews into TF-IDF vectors
review_vectors = vectorizer.fit_transform(reviews)

# Calculate the cosine similarity between all pairs of products
product_similarity = cosine_similarity(review_vectors)

# Create a function to recommend products to a user
def recommend_products(user_age, user_gender, user_area):
    # Validate age input
    if user_age <= 18 or user_age >= 75:
        raise ValueError("Age should be between 18 and 75.")
        
    # Validate gender input
    valid_genders = ["Male", "Female"]
    if user_gender not in valid_genders:
        raise ValueError("Invalid gender. Please enter 'Male', 'Female', or 'Other'.")
        
    # Validate area input
    valid_areas = ["Central", "Eastern", "Northern", "North Central", "North Western", "Sabaragamuwa", "Southern", "Uva", "Western"]
    if user_area not in valid_areas:
        raise ValueError("Invalid area. Please enter a valid province name.")

    # Filter products that match the user's age, gender, and area
    filtered_indices = np.where((age == user_age) & (gender == user_gender) & (area == user_area))[0]

    # Check if there are no matching products
    if len(filtered_indices) == 0:
        raise ValueError("No products found for the specified criteria.")

    # Calculate the similarity scores for the filtered products
    similarity_scores = np.mean(product_similarity[filtered_indices], axis=0)

    # Handle NaN values in similarity_scores
    similarity_scores = np.nan_to_num(similarity_scores)

    # Sort the products by their similarity to the user
    similar_products = np.argsort(similarity_scores)[::-1]

    # Initialize set to keep track of recommended product names
    recommended_set = set()

    # Return the top 10 unique products with their scores
    recommendations = []
    for rank, product in enumerate(similar_products):
        if len(recommendations) >= 10:
            break
        product_name = product_names[product]
        if product_name not in recommended_set:
            score = similarity_scores[product]
            recommendations.append((rank + 1, product_name, score))
            recommended_set.add(product_name)
    return recommendations

# Recommend products to a user
user_age = 32
user_gender = "Female"
user_area = "Central"

try:
    recommended_products = recommend_products(user_age, user_gender, user_area)
    # Print the recommended products with rank numbers and scores
    for rank, product, score in recommended_products:
        print(f"Rank {rank}: {product}")
except ValueError as e:
    print("Error:", str(e))

Rank 1: Wijaya Coffee 50g
Rank 2: Lipton Ceylonta Black Tea Pouch, 200g
Rank 3: Kinder Bueno 3 Pack 129g
Rank 4: Wijaya Noodles 500g
Rank 5: Maliban Lemon Puff 200g
Rank 6: Mixed Nuts (Almond,Walnuts,Pistachio,Cashew, & Cranberry) 200g
Rank 7: Harischandra Plain Noodles 400G
Rank 8: Chinese Noodles 500g
Rank 9: Diva Rose Fresh Laundry Detergent Soap 115G
Rank 10: Roasted Ground Coffee Cafe Classic - Pure Soluble 250g Made In Italy By Italian Mart


## Collaborative filtering for old user using their sentimental score for the reviews by them

In [17]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Filter the data for the old user
old_user = 'Johnson Sophia'
old_user_data = dataset[dataset['reviewer_name'] == old_user]

# Filter the data for products with sentiment score less than 2
filtered_data = dataset[dataset['Overall Sentiment Score'] < 2].copy()

# Create a TF-IDF vectorizer
vectorizer = TfidfVectorizer()

# Apply TF-IDF vectorization to the review text
review_vectors = vectorizer.fit_transform(filtered_data['Review'])

# Calculate cosine similarity between the old user's reviews and the filtered products
cosine_similarities = cosine_similarity(vectorizer.transform(old_user_data['Review']), review_vectors)

# Get the average sentiment score for each product
filtered_data['Average_Sentiment'] = filtered_data.groupby('Product_name')['Overall Sentiment Score'].transform('mean')

# Combine the average sentiment scores with the cosine similarities
filtered_data['Score'] = cosine_similarities[0] * filtered_data['Average_Sentiment']

# Sort the data based on the score
sorted_data = filtered_data.sort_values('Score', ascending=False)

# Add ranking numbers to the sorted data
sorted_data['Rank'] = range(1, len(sorted_data) + 1)

# Get the top 10 recommended products with ranking numbers
top_10_products = sorted_data[['Rank', 'Product_name', 'Score']].head(10)

print("Top 10 recommended products for", old_user, "with their past reviews:")
print(top_10_products)


Top 10 recommended products for Johnson Sophia with their past reviews:
      Rank                                       Product_name     Score
1200     1             Aachi Pani Puri Kit (Paani Poori) 155g  0.265167
1795     2                Diamond Full Cream Milk Powder 400g  0.260293
731      3              Lipton Ceylonta Black Tea Pouch, 200g  0.235754
615      4                  Knorr Maldivefish Powder Mix, 40g  0.225550
1621     5                                  Wijaya Coffee 50g  0.212773
1260     6                    Sun Crush Sparkling Guava 300ml  0.208550
511      7                        Wijaya White Rice Flour 1Kg  0.206240
1047     8  Pink Pie Strawberry Flavour Mini Snack Bisuit ...  0.197449
243      9     Lifebuoy Total 10 Handwash Refill Pouch, 180ml  0.184877
201     10  Dettol Antibacterial Surface Disinfectant Spra...  0.181818
