In [1]:
import pandas as pd

data = {
    "User": ["A", "A", "A", "B", "B", "B", "C", "C", "C"],
    "Item": [
        "Oats",
        "Almond Milk",
        "Banana",
        "Chips",
        "Soda",
        "Cookies",
        "Apple",
        "Yogurt",
        "Granola",
    ],
    "Category": [
        "Healthy",
        "Healthy",
        "Fruits",
        "Snacks",
        "Drinks",
        "Snacks",
        "Fruits",
        "Dairy",
        "Healthy",
    ],
}

df = pd.DataFrame(data)

df


Unnamed: 0,User,Item,Category
0,A,Oats,Healthy
1,A,Almond Milk,Healthy
2,A,Banana,Fruits
3,B,Chips,Snacks
4,B,Soda,Drinks
5,B,Cookies,Snacks
6,C,Apple,Fruits
7,C,Yogurt,Dairy
8,C,Granola,Healthy


In [2]:
pivot_table = pd.crosstab(df['User'], df['Item'])

pivot_table


Item,Almond Milk,Apple,Banana,Chips,Cookies,Granola,Oats,Soda,Yogurt
User,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
A,1,0,1,0,0,0,1,0,0
B,0,0,0,1,1,0,0,1,0
C,0,1,0,0,0,1,0,0,1


In [3]:
from sklearn.metrics.pairwise import cosine_similarity

similarity = cosine_similarity(pivot_table)

similarity_df = pd.DataFrame(
    similarity,
    index=pivot_table.index,
    columns=pivot_table.index
)

similarity_df


User,A,B,C
User,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
A,1.0,0.0,0.0
B,0.0,1.0,0.0
C,0.0,0.0,1.0


In [5]:
def recommend_items(user):
    similar_users = similarity_df[user].sort_values(ascending=False)[1:]

    recommended_items = set()

    for sim_user in similar_users.index:
        items = pivot_table.loc[sim_user]
        items_bought = items[items > 0].index
        recommended_items.update(items_bought)

    user_items = pivot_table.loc[user]
    already_bought = user_items[user_items > 0].index

    return list(recommended_items - set(already_bought))



In [6]:
recommend_items("A")


['Yogurt', 'Cookies', 'Granola', 'Apple', 'Soda', 'Chips']