In [7]:
import pandas as pd
import numpy as np
from surprise import Dataset, Reader, KNNBasic
from surprise.model_selection import train_test_split as surprise_train_test_split
from surprise import accuracy

#load dataset with remedy id for each remedy
df = pd.read_csv('modified_remedy_data.csv')

#combine constraints
df['user_id'] = df[['Skin_type', 'Sensitivity', 'Blood_group', 'Hyperpigmentation_type']].apply(lambda x: '_'.join(x), axis=1)

#rating process
np.random.seed(42)
df['rating'] = np.random.randint(1, 6, df.shape[0])  # Random ratings


reader = Reader(rating_scale=(1, 5))  
data = Dataset.load_from_df(df[['user_id', 'Remedy_ID', 'rating']], reader)


#train test split
trainset, testset = surprise_train_test_split(data, test_size=0.25)

# User-based Collaborative Filtering (UBCF) using KNNBasic
sim_options_user_based = {
    'name': 'cosine',  # Cosine similarity
    'user_based': True  # True for user-based filtering
}

algo_user_based = KNNBasic(sim_options=sim_options_user_based)
algo_user_based.fit(trainset)

# Predict and evaluate
predictions_user_based = algo_user_based.test(testset)
rmse_user_based = accuracy.rmse(predictions_user_based, verbose=False)

# Output the RMSE for UBCF
print(f"User-based CF RMSE: {rmse_user_based}")

# Function to predict remedy ratings for all remedies based on user input
def predict_remedy_ratings(skin_type, sensitivity, blood_group, hyperpigmentation_type):
    user_id = f"{skin_type}_{sensitivity}_{blood_group}_{hyperpigmentation_type}"
    predictions = []
    remedy_ids = df['Remedy_ID'].unique()
    for remedy_id in remedy_ids:
        pred = algo_user_based.predict(user_id, remedy_id)
        predictions.append((remedy_id, pred.est))
    
    # Sort predictions based on estimated ratings
    predictions.sort(key=lambda x: x[1], reverse=True)
    print(predictions)
    
    # Return the remedy with the highest predicted rating
    return predictions[0] if predictions else None

# Get custom user input
skin_type = input("Enter your skin type: ")
sensitivity = input("Enter your sensitivity level: ")
blood_group = input("Enter your blood group: ")
hyperpigmentation_type = input("Enter your hyperpigmentation type: ")

# Predict and display the best remedy
best_remedy = predict_remedy_ratings(skin_type, sensitivity, blood_group, hyperpigmentation_type)

if best_remedy:
    remedy_id, predicted_rating = best_remedy
    remedy_details = df[df['Remedy_ID'] == remedy_id].iloc[0]  # Get details of the recommended remedy
    
    # Print the remedy details along with predicted rating
    print(f"Recommended remedy: {remedy_details['Remedy']} (ID: {remedy_id})")
    # print(f"Description: {remedy_details['Description']}")
    print(f"Predicted rating: {predicted_rating:.2f}")
else:
    print("No predictions could be made.")


Computing the cosine similarity matrix...
Done computing similarity matrix.
User-based CF RMSE: 1.4352958605787944


Enter your skin type:  Dry
Enter your sensitivity level:  
Enter your blood group:  A-
Enter your hyperpigmentation type:  Sun_induced


[(1, 2.9140625), (2, 2.9140625), (3, 2.9140625), (4, 2.9140625), (5, 2.9140625), (6, 2.9140625), (7, 2.9140625), (8, 2.9140625), (9, 2.9140625), (10, 2.9140625), (11, 2.9140625), (12, 2.9140625), (13, 2.9140625), (14, 2.9140625), (15, 2.9140625), (16, 2.9140625), (17, 2.9140625), (18, 2.9140625), (19, 2.9140625), (20, 2.9140625), (21, 2.9140625), (22, 2.9140625), (23, 2.9140625), (24, 2.9140625), (25, 2.9140625), (26, 2.9140625), (27, 2.9140625), (28, 2.9140625), (29, 2.9140625), (30, 2.9140625), (31, 2.9140625), (32, 2.9140625), (33, 2.9140625), (34, 2.9140625), (35, 2.9140625), (36, 2.9140625), (37, 2.9140625), (38, 2.9140625), (39, 2.9140625), (40, 2.9140625), (41, 2.9140625), (42, 2.9140625), (43, 2.9140625), (44, 2.9140625), (45, 2.9140625), (46, 2.9140625), (47, 2.9140625), (48, 2.9140625), (49, 2.9140625), (50, 2.9140625), (51, 2.9140625), (52, 2.9140625), (53, 2.9140625), (54, 2.9140625), (55, 2.9140625), (56, 2.9140625), (57, 2.9140625), (58, 2.9140625), (59, 2.9140625), (60, 