In [55]:
import pandas as pd

ratings_data = pd.read_csv('Dataset/last/XWines_Slim_150K_ratings.csv')
wine_data = pd.read_csv('Dataset/last/XWines_Slim_1K_wines.csv')
group_data = pd.read_csv('Dataset/last/group_composition.csv')  

# merge the two datasets on 'WineID'
merged_data = pd.merge(ratings_data, wine_data, on='WineID')

# function to find the best-rated wine or suggest a completely different one if rating < 4
def recommend_wine_for_user(user_id, merged_data):
    user_wines = merged_data[merged_data['UserID'] == user_id]

    if user_wines.empty:
        return f"No wines found for user {user_id}.", None

    best_rated_wine = user_wines.loc[user_wines['Rating'].idxmax()]

    # if the best rating is 4 or higher, return that wine
    if best_rated_wine['Rating'] >= 4:
        return best_rated_wine['WineID'], best_rated_wine['Rating']

    # if no wine has a rating of 4 or higher, find a completely different wine
    else:
        
        characteristics = ['Type', 'Body']

        # filter out wines that are similar to the one the user rated poorly
        different_wines = merged_data
        for char in characteristics:
            different_wines = different_wines[different_wines[char] != best_rated_wine[char]]

        # if there are still wines left, choose one randomly or based on rating
        if not different_wines.empty:
            recommended_wine = different_wines.sample().iloc[0]  # Sample one random different wine
            return recommended_wine['WineID'], None

        return f"No sufficiently different wines found for user {user_id}.", None

# function to recommend wine for a group 
def recommend_wine_for_group(group_id, group_data, merged_data):
    group_info = group_data[group_data['group_id'] == group_id].iloc[0]
    group_members = eval(group_info['group_members'])  
    recommendations = []

    # loop through each member of the group and get their favorite wine
    for user_id in group_members:
        wine_id, rating = recommend_wine_for_user(user_id, merged_data)
        recommendations.append({
            'group_id': group_id,
            'user_id': user_id,
            'wine_id': wine_id,
            'rating': rating if rating is not None else 'Suggested different wine'
        })

    recommendation_df = pd.DataFrame(recommendations)
    return recommendation_df


group_id = int(group_data.loc[:,"group_id"].sample().iloc[0]) 
result_df = recommend_wine_for_group(group_id, group_data, merged_data)
print(result_df)


  ratings_data = pd.read_csv('Dataset/last/XWines_Slim_150K_ratings.csv')


   group_id  user_id  wine_id  rating
0       170  1042330   179080     5.0
1       170  1809422   162497     4.5
2       170  1176344   111395     5.0
3       170  1253808   167418     4.0
4       170  1151770   111415     5.0
5       170  1393882   162514     4.0
6       170  1335852   111395     4.5
7       170  1023294   112875     5.0


In [56]:
# create average rating for each wine

ratings = {}
for index, row in ratings_data.iterrows():
    if row['WineID'] not in ratings:
        ratings[row['WineID']] = {"total": row['Rating'], "count": 1}
    else:
        ratings[row['WineID']]["total"] += row['Rating']
        ratings[row['WineID']]["count"] += 1

# add the averages to the df
wine_data["AvgRating"] = 0.0

for index, row in wine_data.iterrows():
    wine_data.loc[index, "AvgRating"] = ratings[row["WineID"]]["total"]/ratings[row["WineID"]]["count"]

In [57]:

from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import OneHotEncoder

# Function to find similar wines using KNN for a given wine
def find_knn_for_wine(wine_id, merged_data, user_id, k=10):
    # Extract relevant features 
    features = ['Type', 'Body'] 
    wine_features = merged_data[features]
    encoder = OneHotEncoder(sparse_output=False)
    encoded_wine_features = encoder.fit_transform(wine_features)

    # fit KNN model
    knn_model = NearestNeighbors(n_neighbors=k, metric='euclidean', radius=20)
    knn_model.fit(encoded_wine_features)

    # get the features of the wine with the specified wine_id
    target_wine = merged_data[merged_data['WineID'] == wine_id]

    if target_wine.empty:
        return f"No wine found with WineID {wine_id}."
    target_wine_features = target_wine[features]
    encoded_target_wine_features = encoder.transform(target_wine_features)

    # find K nearest wines to the target wine
    distances, indices = knn_model.kneighbors(encoded_target_wine_features)

    # get the recommended similar wines 
    recommended_wines = merged_data.iloc[indices[0]]
    recommended_wines = recommended_wines[recommended_wines['WineID'] != wine_id]
    recommended_list = recommended_wines[['WineID', 'Type', 'Body', 'Rating']].to_dict(orient='records')
    
    # get the wine that it eas recommended on
    original_wine = wine_data.where(wine_data['WineID'] == wine_id).dropna()
    original_wine_ratings = ratings_data.where((ratings_data['WineID'] == wine_id) & (ratings_data['UserID'] == user_id)).dropna().iloc[:]['Rating']
    if len(original_wine_ratings) == 0:
        original_wine_rating = None
    else:
        original_wine_rating = original_wine_ratings.iloc[0]
    if len(original_wine.iloc[:]["Type"]) == 0:
        rec_wine_type = None
    else:
        rec_wine_type = original_wine.iloc[0]["Type"]
    if len(original_wine.iloc[:]["Body"]) == 0:
        rec_wine_body = None
    else:
        rec_wine_body = original_wine.iloc[0]["Body"]
    recommended_list.append({"WineID": wine_id, "Type": rec_wine_type, "Body": rec_wine_body, "Rating": original_wine_rating})

    return recommended_list

# function to find KNN recommendations for all wines in the group recommendation
def recommend_similar_wines_for_group(result_df, merged_data, k=10):
    knn_recommendations = []

    for index, row in result_df.iterrows():
        wine_id = row['wine_id']
        user_id = row['user_id']
        knn_result = find_knn_for_wine(wine_id, merged_data, user_id, k)

        knn_recommendations.append({
            'user_id': int(user_id),
            'wine_id': int(wine_id),
            'knn_recommendations': knn_result
        })
    

    knn_recommendation_df = pd.DataFrame(knn_recommendations)
    return knn_recommendation_df

result_df = recommend_wine_for_group(group_id=group_id, group_data=group_data, merged_data=merged_data)
knn_recommendation_df = recommend_similar_wines_for_group(result_df, merged_data, k=10)
knn_recommendation_df


Unnamed: 0,user_id,wine_id,knn_recommendations
0,1042330,179080,"[{'WineID': 135885, 'Type': 'Red', 'Body': 'Ve..."
1,1809422,162497,"[{'WineID': 111431, 'Type': 'Red', 'Body': 'Fu..."
2,1176344,111395,"[{'WineID': 111431, 'Type': 'Red', 'Body': 'Fu..."
3,1253808,167418,"[{'WineID': 135885, 'Type': 'Red', 'Body': 'Ve..."
4,1151770,111415,"[{'WineID': 111431, 'Type': 'Red', 'Body': 'Fu..."
5,1393882,162514,"[{'WineID': 111431, 'Type': 'Red', 'Body': 'Fu..."
6,1335852,111395,"[{'WineID': 111431, 'Type': 'Red', 'Body': 'Fu..."
7,1023294,112875,"[{'WineID': 113240, 'Type': 'Red', 'Body': 'Me..."


In [58]:
# each property gets a score from each user, based on the ordering. Then sum the scores of each characteristic and choose the wine that have those characteristics. If multiple wines, then choose based on the rating (todo)

In [59]:
def score_characteristics(recommendations):
    scores = {"Type": {}, "Body": {}}
    score = len(recommendations)
    c = 0
    for wine in recommendations:
        for key, value in wine.items():
            if key in ["WineID", "Rating"]:
                continue
            if key == "Type":
                if value not in scores["Type"]:
                    scores["Type"][value] = 1
                else:
                    scores["Type"][value] += 1
            elif key == "Body":
                if value not in scores["Body"]:
                    scores["Body"][value] = 1
                else:
                    scores["Body"][value] += 1
        c+=1
    
    return scores

category_weights_by_user = []
for index, row in knn_recommendation_df.iterrows():
    x = score_characteristics(row.knn_recommendations)
    category_weights_by_user.append(x)
    print(x)

{'Type': {'Red': 10}, 'Body': {'Very full-bodied': 10}}
{'Type': {'Red': 11}, 'Body': {'Full-bodied': 11}}
{'Type': {'Red': 9}, 'Body': {'Full-bodied': 9}}
{'Type': {'Red': 11}, 'Body': {'Very full-bodied': 11}}
{'Type': {'Red': 9}, 'Body': {'Full-bodied': 9}}
{'Type': {'Red': 11}, 'Body': {'Full-bodied': 11}}
{'Type': {'Red': 9}, 'Body': {'Full-bodied': 9}}
{'Type': {'Red': 11}, 'Body': {'Medium-bodied': 11}}


In [60]:
category_weights = {"Type": {}, "Body": {}}
for i in category_weights_by_user:
    for key, value in i["Type"].items():
        if key not in category_weights["Type"]:
            category_weights["Type"][key] = value
        else:
            category_weights["Type"][key] += value
        
    for key, value in i["Body"].items():
        if key not in category_weights["Body"]:
            category_weights["Body"][key] = value
        else:
            category_weights["Body"][key] += value

category_weights

{'Type': {'Red': 81},
 'Body': {'Very full-bodied': 21, 'Full-bodied': 49, 'Medium-bodied': 11}}

In [61]:
# order according to the scores
category_weights_sorted = {"Type": {}, "Body": {}}
for key, value in category_weights.items():
    category_weights_sorted[key] = dict(sorted(category_weights[key].items(), key=lambda item: item[1], reverse=True))

category_weights_sorted

{'Type': {'Red': 81},
 'Body': {'Full-bodied': 49, 'Very full-bodied': 21, 'Medium-bodied': 11}}

In [62]:
# select wine with top categories of each type
def get_wine_with_top_categories(category_weights_sorted=category_weights_sorted, top_type_index=1, top_body_index=1):
    top_type = list(category_weights_sorted["Type"].keys())[top_type_index-1]
    top_body = list(category_weights_sorted["Body"].keys())[top_body_index-1]
    
    # find a wine with these categories
    selection = wine_data.where((wine_data['Type'] == top_type) & (wine_data['Body'] == top_body)).dropna()
    
    if selection.shape[0] == 0:
        if top_type_index == top_body_index:
            selection = get_wine_with_top_categories(category_weights_sorted=category_weights_sorted, top_type_index=top_type_index+1, top_body_index=top_body_index)
        else:
            selection = get_wine_with_top_categories(category_weights_sorted=category_weights_sorted,top_type_index=top_type_index, top_body_index=top_body_index+1)
    
    selection.sort_values(by='AvgRating', ascending=False, inplace=True)

    return selection.head(1)

get_wine_with_top_categories()

Unnamed: 0,WineID,WineName,Type,Elaborate,Grapes,Harmonize,ABV,Body,Acidity,Code,Country,RegionID,RegionName,WineryID,WineryName,Website,Vintages,AvgRating
809,180330.0,IX Estate Red,Red,Assemblage/Blend,"['Cabernet Sauvignon', 'Cabernet Franc', 'Merl...","['Beef', 'Lamb', 'Game Meat', 'Poultry']",15.1,Full-bodied,Medium,US,United States,1848.0,Napa Valley,57081.0,Colgin,http://www.colgincellars.com,"[2021, 2020, 2019, 2018, 2017, 2016, 2015, 201...",4.728261


In [63]:
get_wine_with_top_categories()["WineID"]

809    180330.0
Name: WineID, dtype: float64

In [64]:
group_data

Unnamed: 0.1,Unnamed: 0,group_id,group_size,group_similarity,group_members,avg_similarity
0,0,0,2,random,"[1110381, 1212243]",-0.006469
1,1,1,2,random,"[1180876, 1006585]",-0.015370
2,2,2,2,random,"[1174271, 1159745]",-0.015015
3,3,3,2,random,"[1223585, 1150327]",0.105930
4,4,4,2,random,"[1822155, 1138489]",0.163505
...,...,...,...,...,...,...
235,235,235,8,similar_one_divergent,"[1174766, 1226880, 1059478, 1062080, 1039599, ...",0.376073
236,236,236,8,similar_one_divergent,"[1203902, 1166271, 1078943, 2056705, 1204723, ...",0.400792
237,237,237,8,similar_one_divergent,"[1198066, 1041042, 1086821, 1293899, 1134438, ...",0.248949
238,238,238,8,similar_one_divergent,"[1908334, 1714244, 1018208, 1054770, 1642793, ...",0.361038


In [65]:
#evaluate_recommender(knn_recommendation_df, group_data, ratings_data)

In [66]:
knn_recommendation_df

Unnamed: 0,user_id,wine_id,knn_recommendations
0,1042330,179080,"[{'WineID': 135885, 'Type': 'Red', 'Body': 'Ve..."
1,1809422,162497,"[{'WineID': 111431, 'Type': 'Red', 'Body': 'Fu..."
2,1176344,111395,"[{'WineID': 111431, 'Type': 'Red', 'Body': 'Fu..."
3,1253808,167418,"[{'WineID': 135885, 'Type': 'Red', 'Body': 'Ve..."
4,1151770,111415,"[{'WineID': 111431, 'Type': 'Red', 'Body': 'Fu..."
5,1393882,162514,"[{'WineID': 111431, 'Type': 'Red', 'Body': 'Fu..."
6,1335852,111395,"[{'WineID': 111431, 'Type': 'Red', 'Body': 'Fu..."
7,1023294,112875,"[{'WineID': 113240, 'Type': 'Red', 'Body': 'Me..."


In [67]:
group_data["group_members"][0]

'[1110381, 1212243]'

In [68]:
ratings_data

Unnamed: 0,RatingID,UserID,WineID,Vintage,Rating,Date
0,143,1356810,103471,1950,4.5,2021-11-02 20:52:59
1,199,1173759,111415,1951,5.0,2015-08-20 17:46:26
2,348,1164877,111395,1952,5.0,2020-11-13 05:40:26
3,374,1207665,111433,1953,5.0,2017-05-05 06:44:13
4,834,1075841,111431,1955,5.0,2016-09-14 20:18:38
...,...,...,...,...,...,...
149995,21013438,1000052,111468,N.V.,4.5,2021-12-22 21:03:51
149996,21013467,1180844,111461,N.V.,4.0,2017-04-23 21:07:55
149997,21013494,1218581,113690,N.V.,3.5,2019-04-14 17:45:08
149998,21013505,1106198,111468,N.V.,4.5,2021-07-10 07:00:15


In [69]:
import pandas as pd
import ast  # Import to safely parse string representation of lists

def get_wines_rated_by_group(group_df, group_id, ratings_df, wine_id=None):
    """
    Get all wines rated by a specific group based on group_id, with optional filtering for a specific wine_id.

    Parameters:
    group_df (pd.DataFrame): DataFrame containing group data with columns like ['group_id', 'group_members'].
    group_id (int): The ID of the group to analyze.
    ratings_df (pd.DataFrame): DataFrame containing ratings data with columns ['UserID', 'WineID', 'Rating'].
    wine_id (int, optional): The ID of a specific wine to filter. Defaults to None.

    Returns:
    pd.DataFrame: DataFrame containing all wines rated by users in the specified group, optionally filtered by the given WineID.
    """
    # Step 1: Extract the group_members string using the group_id
    group_row = group_df[group_df['group_id'] == group_id]

    # Step 2: Check if group_members list is found
    if group_row.empty:
        print(f"Group ID {group_id} not found.")
        return pd.DataFrame(columns=['UserID', 'WineID', 'Rating'])
    
    # Step 3: Convert the group_members string into an actual list
    group_members_str = group_row['group_members'].values[0]
    group_members = ast.literal_eval(group_members_str)  # Safely convert string to list
    
    # Step 4: Filter ratings_data for rows where UserID is in the group_members list
    filtered_ratings = ratings_df[ratings_df['UserID'].isin(group_members)]
    
    # Step 5: If wine_id is provided, filter further for that specific WineID
    if wine_id is not None:
        filtered_ratings = filtered_ratings[filtered_ratings['WineID'] == wine_id]

    # Step 6: Return the filtered DataFrame
    return filtered_ratings[['UserID', 'WineID', 'Rating']]

# Example Usage:
# Assuming you have `group_data` and `ratings_data` DataFrames defined
# result_df = get_wines_rated_by_group(group_data, 5, ratings_data, wine_id=111544)


In [70]:
get_wines_rated_by_group(group_data,5,ratings_data,wine_id=193478)

Unnamed: 0,UserID,WineID,Rating
5881,1078014,193478,4.5
