<a href="https://colab.research.google.com/github/VellummyilumVinoth/Aspect_based_sentimental_analysis/blob/main/recommendation_system_for_old_and_new_users.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# Step 1: Load the dataset
import pandas as pd
dataset = pd.read_csv('/content/drive/MyDrive/zidan/aspect_extracted_file_info.csv')

In [3]:
dataset

Unnamed: 0,Product_name,Category,Review,Overall Sentiment Score,reviewer_name,age,gender,area
0,"Sunlight Care Detergent Liquid, 1L",Laundry,I purchased 2 bottles. Both seem partially use...,3,Davis Zidan,56,Female,Eastern
1,"Sunlight Care Detergent Liquid, 1L",Laundry,Very fast & safe delivery . Neet packing. I wi...,0,Christopher John,26,Male,North Western
2,"Sunlight Care Detergent Liquid, 1L",Laundry,Fast delivery during this pandemic. Got the pr...,0,Wilson Olivia,45,Male,Eastern
3,"Sunlight Care Detergent Liquid, 1L",Laundry,Alot of liquid has leaked.im very much disappo...,0,Johnson Daniel,67,Female,Southern
4,"Sunlight Care Detergent Liquid, 1L",Laundry,Thank you very much !! I received my package t...,0,Anderson David,30,Male,North Western
...,...,...,...,...,...,...,...,...
1837,Sustagen Nutritional Supplement Chocolate Flav...,MilkPowder,very good product value for the price,0,Christopher Emma,29,Female,North Central
1838,Sustagen Nutritional Supplement Chocolate Flav...,MilkPowder,Delivery within 1day to Tangalle... well packe...,0,Maryam Emma,39,Female,Sabaragamuwa
1839,Sustagen Nutritional Supplement Chocolate Flav...,MilkPowder,high quality reasonable price,2,Davis Emily,72,Female,Sabaragamuwa
1840,Sustagen Nutritional Supplement Chocolate Flav...,MilkPowder,Excellent packaging. And it was delivered quic...,0,Anderson Saini,20,Female,North Western


## Content based filtering for new user using age, gender and area

In [9]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

reviews = dataset["Review"]
product_names = dataset["Product_name"]
overall_sentiment_score = dataset["Overall Sentiment Score"]
reviewer_name = dataset["reviewer_name"]
age = dataset["age"]
gender = dataset["gender"]
area = dataset["area"]

# Combine relevant features into a single DataFrame
features = pd.DataFrame({'Review': reviews, 'Product_name': product_names, 'Age': age, 'Gender': gender, 'Area': area})

# Create a TF-IDF vectorizer to convert text into numerical features
tfidf = TfidfVectorizer(stop_words='english')

# Apply TF-IDF vectorization to the 'Review' column
review_matrix = tfidf.fit_transform(features['Review'])

# Compute the cosine similarity matrix between review texts
cosine_similarities = linear_kernel(review_matrix, review_matrix)

# Define a function to get top recommendations based on demographic similarity
def get_recommendations(user_age, user_gender, user_area, top_n=10):
    # Find the indices of users with similar demographics
    similar_users = features[(features['Age'] == user_age) & (features['Gender'] == user_gender) & (features['Area'] == user_area)].index

    # Calculate the average sentiment score of similar users
    avg_sentiment_score = overall_sentiment_score[similar_users].mean()

    # Calculate the weighted average of cosine similarities with similar users
    weighted_similarities = cosine_similarities[similar_users].mean(axis=0)

    # Sort the indices based on weighted similarity scores
    top_indices = weighted_similarities.argsort()[::-1][:top_n]

    # Get the product names of the top recommendations
    top_recommendations = product_names[top_indices]

    return top_recommendations, avg_sentiment_score

# Example usage:
user_age = 45
user_gender = 'Male'
user_area = 'Eastern'

recommendations, avg_sentiment = get_recommendations(user_age, user_gender, user_area, top_n=10)

print("Recommended products:")
for i, product in enumerate(recommendations, 1):
    print(f"Rank {i}: {product}")

print("Average sentiment score of similar users:", avg_sentiment)


Recommended products:
Rank 1: Wijaya Coffee 50g
Rank 2: Green Tea 25 Tea Bags Carton
Rank 3: Sera Soup Vegetable 50g
Rank 4: Diva Rose Fresh Laundry Detergent Soap 115G
Rank 5: Harpic Toilet Cleaner Liquid with Power Plus 10/10 Stain Removal 750ml Original
Rank 6: Munchee Cream Cracker 490G
Rank 7: Munchee Milk Short Cake Biscuits 200G
Rank 8: Cherish Nice Biscuit - 480g
Rank 9: SMAK Woodapple Nectar 01ltr
Rank 10: Laojee Pure Ceylon Black Tea Pouch, 200g
Average sentiment score of similar users: 1.1666666666666667


## Collaborative filtering for old user using their sentimental score for the reviews by them

In [11]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Filter the data for the old user
old_user = 'Johnson Sophia'
old_user_data = dataset[dataset['reviewer_name'] == old_user]

# Filter the data for products with sentiment score less than 2
filtered_data = dataset[dataset['Overall Sentiment Score'] < 2].copy()

# Create a TF-IDF vectorizer
vectorizer = TfidfVectorizer()

# Apply TF-IDF vectorization to the review text
review_vectors = vectorizer.fit_transform(filtered_data['Review'])

# Calculate cosine similarity between the old user's reviews and the filtered products
cosine_similarities = cosine_similarity(vectorizer.transform(old_user_data['Review']), review_vectors)

# Get the average sentiment score for each product
filtered_data['Average_Sentiment'] = filtered_data.groupby('Product_name')['Overall Sentiment Score'].transform('mean')

# Combine the average sentiment scores with the cosine similarities
filtered_data['Score'] = cosine_similarities[0] * filtered_data['Average_Sentiment']

# Sort the data based on the score
sorted_data = filtered_data.sort_values('Score', ascending=False)

# Add ranking numbers to the sorted data
sorted_data['Rank'] = range(1, len(sorted_data) + 1)

# Get the top 10 recommended products with ranking numbers
top_10_products = sorted_data[['Rank', 'Product_name', 'Score']].head(10)

print("Top 10 recommended products for", old_user, "with their past reviews:")
print(top_10_products)


Top 10 recommended products for Johnson Sophia with their past reviews:
      Rank                                       Product_name     Score
94       1      Sunlight Yellow Detegent Soap Multipack, 330g  0.333333
1266     2                    Sun Crush Sparkling Guava 300ml  0.264737
485      3      Extra Virgin Olive Oil 1L DANTE Made in Italy  0.234234
175      4                         Vim Dishwash Liquid, 250ml  0.217098
1184     5                            Sera Soup Vegetable 50g  0.203479
171      6                         Vim Dishwash Liquid, 250ml  0.198865
1538     7  LAVAZZA Crema E Gusto Classico Ground Coffee 2...  0.198854
1247     8                        SMAK Woodapple Nectar 01ltr  0.192536
1272     9                  MD Orange Nectar 1 Ltr Pet Bottle  0.192026
1242    10                        SMAK Woodapple Nectar 01ltr  0.184728
