In [15]:
import pandas as pd

# Load the two datasets (replace 'ratings.csv' and 'wine_characteristics.csv' with your actual CSV file paths)
ratings_data = pd.read_csv('Dataset/last/XWines_Slim_150K_ratings.csv')
wine_data = pd.read_csv('Dataset/last/XWines_Slim_1K_wines.csv')

# Merge the two datasets on 'wine_id'
merged_data = pd.merge(ratings_data, wine_data, on='WineID')

# Function to find the best-rated wine or suggest a completely different one if rating < 4
def recommend_wine_for_user(user_id, merged_data):
    # Filter wines rated by the specific user
    user_wines = merged_data[merged_data['UserID'] == user_id]

    if user_wines.empty:
        return f"No wines found for user {user_id}."

    # Find the wine with the highest rating by the user
    best_rated_wine = user_wines.loc[user_wines['Rating'].idxmax()]

    # If the best rating is 4 or higher, return that wine
    if best_rated_wine['Rating'] >= 4:
        return f"Best rated wine for user {user_id}: {best_rated_wine['WineID']} (Rating: {best_rated_wine['Rating']})"

    # If no wine has a rating of 4 or higher, find a completely different wine
    else:
        # Find a wine that is different in terms of 'wine_type', 'wine_body', etc.
        # Define characteristics to consider (adjust based on your dataset)
        characteristics = ['Type', 'Body']

        # Filter out wines that are similar to the one the user rated poorly
        different_wines = merged_data
        for char in characteristics:
            different_wines = different_wines[different_wines[char] != best_rated_wine[char]]

        # If there are still wines left, choose one randomly or based on rating
        if not different_wines.empty:
            recommended_wine = different_wines.sample().iloc[0]  # Sample one random different wine
            return f"User {user_id} rated less than 4. Suggesting different wine: {recommended_wine['WineID']}"

        return f"No sufficiently different wines found for user {user_id}."

# Example usage for a specific user (replace with an actual user_id from your dataset)
user_id = 1006724  # Replace with the actual user_id you want to check
result = recommend_wine_for_user(user_id, merged_data)
print(result)


  ratings_data = pd.read_csv('Dataset/last/XWines_Slim_150K_ratings.csv')


User 1006724 rated less than 4. Suggesting different wine: 193483


In [14]:

# Load the ratings dataset (replace 'ratings.csv' with your actual file path)
ratings_data = pd.read_csv('Dataset/last/XWines_Slim_150K_ratings.csv')

# Group by 'user_id' and find the maximum rating for each user
user_max_ratings = ratings_data.groupby('UserID')['Rating'].max()

# Filter to get only users whose max rating was 3
users_with_max_rating_3 = user_max_ratings[user_max_ratings == 3].index

# Print the list of users whose maximum rating was 3
print("Users whose maximum rating was 3:")
print(users_with_max_rating_3)


Users whose maximum rating was 3:
Index([1006724, 1056257, 1145862, 1196247, 1197650, 1220278, 1245717, 1279250,
       1290651, 1316369, 1334222, 1380716, 1391592, 1393496, 1397551, 1400386,
       1406797, 1420747, 1558571, 1563515, 1664756, 1719976, 1760544, 1762859,
       1781649, 1816088, 1843929, 1936578, 1944444, 1977712, 1997980, 2061195],
      dtype='int64', name='UserID')


  ratings_data = pd.read_csv('Dataset/last/XWines_Slim_150K_ratings.csv')


In [17]:
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import OneHotEncoder


# Function to find the top-rated wine by a user and recommend similar wines using KNN
def recommend_similar_wines(user_id, merged_data, k=10):
    # Filter the wines rated by the specific user
    user_wines = merged_data[merged_data['UserID'] == user_id]

    if user_wines.empty:
        return f"No wines found for user {user_id}."

    # Find the top-rated wine for this user
    top_rated_wine = user_wines.loc[user_wines['Rating'].idxmax()]

    # Extract relevant features (e.g., 'Type', 'Body') for KNN
    features = ['Type', 'Body']  # Adjust based on your dataset

    # Prepare the dataset for KNN (focus on wine characteristics, exclude 'WineID')
    wine_features = merged_data[features]

    # Apply OneHotEncoding to handle categorical variables like 'Type' and 'Body'
    encoder = OneHotEncoder(sparse_output=False)
    encoded_wine_features = encoder.fit_transform(wine_features)

    # Fit KNN model
    knn_model = NearestNeighbors(n_neighbors=k, metric='euclidean')
    knn_model.fit(encoded_wine_features)

    # Encode the top-rated wine's features (ensure it is passed as a DataFrame with feature names)
    top_rated_wine_features = pd.DataFrame([top_rated_wine[features]], columns=features)
    encoded_top_rated_wine_features = encoder.transform(top_rated_wine_features)

    # Find K nearest wines to the top-rated wine
    distances, indices = knn_model.kneighbors(encoded_top_rated_wine_features)

    # Get the recommended similar wines (excluding the top-rated wine itself)
    recommended_wines = merged_data.iloc[indices[0]]

    # Exclude the top-rated wine itself from the recommendations
    recommended_wines = recommended_wines[recommended_wines['WineID'] != top_rated_wine['WineID']]

    return recommended_wines[['WineID', 'Type', 'Body']]


# Example usage for a specific user (replace with an actual user_id from your dataset)
user_id = 1356810  # Replace with the actual user_id you want to check

# Find the best wine for the user
result = recommend_wine_for_user(user_id, merged_data)
print(result)

# Recommend similar wines based on the top-rated wine
recommended_wines = recommend_similar_wines(user_id, merged_data)

# Display the recommended wines
print(recommended_wines)

Best rated wine for user 1356810: 112084 (Rating: 5.0)
     WineID       Type           Body
25   102547  Sparkling  Medium-bodied
153  116266  Sparkling  Medium-bodied
150  111484  Sparkling  Medium-bodied
64   111927  Sparkling  Medium-bodied
127  111544  Sparkling  Medium-bodied
78   111479  Sparkling  Medium-bodied
37   116266  Sparkling  Medium-bodied
107  111544  Sparkling  Medium-bodied
93   111544  Sparkling  Medium-bodied
