<a href="https://colab.research.google.com/github/dashatenoff/recsys-vk/blob/main/norebooks/item_based_cf_jaccard_v3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Item-based Collaborative Filtering (Jaccard)

This notebook implements a simple item-based collaborative filtering
recommender system using Jaccard similarity.

The model is based on implicit user-item interactions (views) from the VK-LSVD dataset.
The implementation focuses on clarity and conceptual correctness rather than efficiency.


In [None]:
import pandas as pd
from google.colab import drive
drive.mount('/content/drive')
import os
os.listdir('/content/drive/MyDrive')
train = pd.read_parquet('/content/drive/MyDrive/VK/train.parquet')
test = pd.read_parquet('/content/drive/MyDrive/VK/test.parquet')

#Функция похожести айтемов

In [2]:
user_items = train.groupby('user_id')['item_id'].apply(set)
item_users = train.groupby('item_id')['user_id'].apply(set)

def jaccard(item1, item2):
  users1 = item_users.get(item1,set())
  users2 = item_users.get(item2, set())

  if not users1 or not users2:
    return 0.0

  return len(users1 & users2) / len(users1 | users2)


#Функция рекомендаций для ОДНОГО пользователя

In [5]:
def recommend_for_user(user_id, user_items, item_users, k=10):
  scores = {}

  seen_items = user_items.get(user_id, set())
  # for item in seen_items:
  #   for user in item_users.get(item, set()):
  #     for other_item in user_items.get(user, set()):
  for item in seen_items:
    for other_item in item_users.keys():
        sim = jaccard(item, other_item)

        if other_item in seen_items:
          continue

        sim = jaccard(item, other_item)
        scores[other_item] = scores.get(other_item, 0) + sim

  ranked_items = sorted(scores.items(), key = lambda x: x[1], reverse=True)

  return [item for item, score in ranked_items[:k]]


In [6]:
test_user_id = list(user_items.keys())[0]
recommend_for_user(test_user_id, user_items, item_users, k=10)


[329362081,
 178221643,
 225067555,
 377181597,
 434582233,
 31692283,
 319579439,
 547469448,
 26828817,
 498328655]