In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity


In [3]:
df=pd.read_csv("filesToProcess/_resultType_withoutContext.csv")
#df=df.loc[~df.contextvalue.isin(["1","2","3","4","5","6","0"]),:]
weightExplicit=0.30

In [4]:
def generate_vector(df, users):
    df_users=df.loc[df.user.isin(users),:]
    interactions=df_users.loc[df_users.type=="INTERACTION",["user","service","value"]]
    ratings=df_users.loc[df_users.type=="RATING",["user","service","value"]]
    #implicit rating
    grouped_interactions= interactions.groupby(["user","service"]).sum("value").reset_index()
    countInteractionsUser = grouped_interactions[["user", "value"]].groupby("user").sum().reset_index()
    matrix_user_rating = grouped_interactions.pivot(index='user', columns='service', values='value').fillna(0)
    matrix_implicit_rating = matrix_user_rating.div(countInteractionsUser.set_index('user')['value'], axis=0)
     #explicit rating
    grouped_ratings = ratings.groupby(['user', 'service']).agg({'value': ['mean']})
    grouped_ratings.columns = grouped_ratings.columns.droplevel(level=0)
    grouped_ratings = grouped_ratings.reset_index()
    # Round the mean
    grouped_ratings['mean'] = grouped_ratings['mean'].round()
    # Normalize the values
    scaler = MinMaxScaler(feature_range=(-0.01, 1))
    grouped_ratings['value_normalized'] = scaler.fit_transform(grouped_ratings[['mean']])
    matrix_explicit_rating = grouped_ratings.pivot(index='user', columns='service', values='value_normalized').fillna(0)
    # reindex the matrices to ensure they have the same columns and rows
    index_labels = matrix_implicit_rating.index.union(matrix_explicit_rating.index)
    column_labels = matrix_implicit_rating.columns.union(matrix_explicit_rating.columns)
    matrix_implicit_rating = matrix_implicit_rating.reindex(index=index_labels, columns=column_labels, fill_value=0)
    matrix_explicit_rating = matrix_explicit_rating.reindex(index=index_labels, columns=column_labels, fill_value=0)
    # Combine the implicit and explicit feedback matrices
    combined_matrix = (weightExplicit * matrix_explicit_rating) + ((1-weightExplicit) * matrix_implicit_rating)
    return combined_matrix

In [5]:
combined_matrix= generate_vector(df, ["OCUnH", "I3RXU","ws0cho3", "gKUuuzg9"])
print(combined_matrix)

service   arabiar tea  capuccino  chocolate  chocolate_milk  cortado  \
user                                                                   
I3RXU           0.000     0.0000   0.000000            0.00   0.0000   
OCUnH          -0.003     0.1485   0.000000            0.14  -0.0030   
gKUuuzg9        0.000     0.0000   0.130128            0.00   0.0000   
ws0cho3         0.000     0.0000   0.000000            0.00   0.1485   

service   decaffeinated  expresso  hazelnut_cappuccino  light_coffee  \
user                                                                   
I3RXU             0.000  0.000000             0.000000      0.000000   
OCUnH            -0.003  0.720000            -0.003000     -0.003000   
gKUuuzg9          0.000  0.000000             0.000000      0.004487   
ws0cho3          -0.003  0.466667             0.381833      0.000000   

service   long_coffee  long_decaffeinated  milk_coffee  
user                                                    
I3RXU           0.00

In [87]:
# Step 1: Calculate User Similarity
user_similarity = cosine_similarity(combined_matrix)
print(user_similarity)

[[1.         0.18006309 0.97449224 0.23257472]
 [0.18006309 1.         0.17543959 0.73026063]
 [0.97449224 0.17543959 1.         0.22664226]
 [0.23257472 0.73026063 0.22664226 1.        ]]


In [88]:
# Step 2: Find Similar Users
target_user = 'OCUnH'
k=3
target_user_index = combined_matrix.index.get_loc(target_user)
k_similar_users_indices = user_similarity[target_user_index].argsort()[::-1][:k]
print(k_similar_users_indices)


[1 3 0]


In [125]:
from sklearn.neighbors import NearestNeighbors
k = 3  # Set the number of nearest neighbors
knn = NearestNeighbors(n_neighbors=k, metric='cosine', algorithm="brute")
knn.fit(combined_matrix)
user_index = "OCUnH"  # Index of the target user in the combined_matrix

distances, indices = knn.kneighbors(combined_matrix.loc[combined_matrix.index.get_level_values("user") ==user_index , :])
print(distances, indices)

[[1.11022302e-16 2.69739365e-01 8.19936909e-01]] [[1 3 0]]


In [126]:
# Retrieve the recommendations from all services, including those already seen
neighbor_ratings = combined_matrix.iloc[indices.flatten()]
print(neighbor_ratings)


service  arabiar tea  capuccino  chocolate  chocolate_milk  cortado  \
user                                                                  
OCUnH         -0.003     0.1485        0.0            0.14  -0.0030   
ws0cho3        0.000     0.0000        0.0            0.00   0.1485   
I3RXU          0.000     0.0000        0.0            0.00   0.0000   

service  decaffeinated  expresso  hazelnut_cappuccino  light_coffee  \
user                                                                  
OCUnH           -0.003  0.720000            -0.003000        -0.003   
ws0cho3         -0.003  0.466667             0.381833         0.000   
I3RXU            0.000  0.000000             0.000000         0.000   

service  long_coffee  long_decaffeinated  milk_coffee  
user                                                   
OCUnH         -0.003              -0.003       0.1370  
ws0cho3        0.000               0.000       0.1485  
I3RXU          0.000               0.000       1.0000  


In [121]:
recommendations = neighbor_ratings.mean(axis=0)
# Sort the recommendations in descending order
sorted_indices = recommendations.argsort()[::-1]
sorted_recommendations = recommendations[sorted_indices]
print(sorted_recommendations)
# Retrieve the service names corresponding to the sorted indices
column_labels = combined_matrix.columns
service_names = column_labels[sorted_indices]

# Return the recommended services
recommended_services = list(zip(service_names, sorted_recommendations))

service
expresso               0.395556
milk_coffee            0.283628
hazelnut_cappuccino    0.126278
capuccino              0.049500
cortado                0.048500
chocolate_milk         0.046667
chocolate              0.043376
light_coffee           0.000496
long_decaffeinated    -0.001000
long_coffee           -0.001000
arabiar tea           -0.001000
decaffeinated         -0.002000
dtype: float64
