In [1]:
!pip install recombee-api-client pandas beautifulsoup4 unidecode




[notice] A new release of pip is available: 24.0 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


# Import-uri si conectare la Recombee

In [2]:
import pandas as pd
import ast
import unidecode
from bs4 import BeautifulSoup
from recombee_api_client.api_client import RecombeeClient, Region
from recombee_api_client.api_requests import *
from recombee_api_client.exceptions import ResponseException
import random

# Configurare conexiune
DB_ID = 'lex-boardgames'
PRIVATE_TOKEN = '8QvaQqqSpep8gHZ05M3m4aJzkXdL4GU9hJKXgzXsGwEbiBZGFfHEghDHEJAu3uoy'

client = RecombeeClient(DB_ID, PRIVATE_TOKEN, region=Region.EU_WEST)
print(f"Client configurat pentru baza de date: {DB_ID}")

Client configurat pentru baza de date: lex-boardgames


# Procesare locala si stergerea itemelor vechi din Recombee

In [3]:
print("--- PASUL 1: Procesare Locala ---")
try:
    df = pd.read_csv('boardgames1.csv')
    df = df.iloc[:15000].copy() # Limita obligatorie
    
    # Functii curatare
    def clean_html(text):
        if pd.isna(text): return ""
        return BeautifulSoup(text, "html.parser").get_text()[:2000]

    def parse_list(text):
        try: return ast.literal_eval(text)
        except: return []

    df['description'] = df['description'].apply(clean_html)
    df['boardgamemechanic'] = df['boardgamemechanic'].apply(parse_list)
    df['boardgamecategory'] = df['boardgamecategory'].apply(parse_list)
    
    print(f"Date locale pregatite: {len(df)} jocuri.")

except FileNotFoundError:
    print("EROARE: Nu gasesc boardgames1.csv")
    df = pd.DataFrame()

if not df.empty:
    print("\n--- PASUL 2: Stergere Iteme Vechi de pe Server ---")
    print("Preluare lista iteme existente...")
    try:
        # Preluam ID-urile existente
        items_on_server = client.send(ListItems(count=15000))
        ids_to_delete = items_on_server
        
        if len(ids_to_delete) > 0:
            print(f"Am gasit {len(ids_to_delete)} iteme vechi. Le sterg...")
            
            del_requests = []
            for i_id in ids_to_delete:
                del_requests.append(DeleteItem(i_id))
                
                if len(del_requests) >= 1000:
                    client.send(Batch(del_requests))
                    del_requests = []
            
            if del_requests:
                client.send(Batch(del_requests))
                
            print("Serverul a fost golit de jocurile vechi.")
        else:
            print("Serverul este deja gol.")
            
    except ResponseException as e:
        print(f"Eroare la stergere: {e}")

--- PASUL 1: Procesare Locala ---
Date locale pregatite: 15000 jocuri.

--- PASUL 2: Stergere Iteme Vechi de pe Server ---
Preluare lista iteme existente...
Am gasit 15000 iteme vechi. Le sterg...
Serverul a fost golit de jocurile vechi.


# Definirea structurii bazei de date din Recombee

In [4]:
print("Definirea structurii bazei de date...")

# Proprietati
properties = [
    ('name', 'string'),
    ('categories', 'set'),
    ('mechanics', 'set'),
    ('description', 'string'),
    ('average', 'double'),
    ('yearpublished', 'int'),
    ('minplayers', 'int'),
    ('maxplayers', 'int'),
    ('minplaytime', 'int'),
    ('minage', 'int')
]

for prop_name, prop_type in properties:
    try:
        client.send(AddItemProperty(prop_name, prop_type))
        print(f"Proprietate creata/verificata: {prop_name}")
    except ResponseException:
        pass # Ignoram daca exista deja

print("Structura este gata.")

Definirea structurii bazei de date...
Structura este gata.


# Incarcarea jocurilor
Am observat ca exista in baza de date anumite categorii care nu sunt relevante pentru sistemul de recomandare, cum ar fi nominalizari. Acestea vor fi filtrate in asa fel incat sa avem doar atributele relevante pentru recomandari in functie de gameplay si strategie.

In [5]:
import pandas as pd
from recombee_api_client.api_requests import SetItemValues, Batch

BANNED_KEYWORDS = [
    'Nominee', 'Winner', 'Recommended', 'Prize', 'Award', 'Place', 
    'Edition', 'Games', 'Best', 'Spill', 'Ludo', 'Jahres', 'Voters', 
    'Pubblico', 'International', 'Meeples', 'Golden Geek', 'Deutscher Spiele',
    'Hit mit Freunden', 'Gouden', 'Årets'
]

def get_clean_categories(raw_list):
    # transforma: ['Economic', '2008 Winner', '(Unknown)'] in: ['Economic']
    if not isinstance(raw_list, list):
        return []
    
    clean_list = []
    for c in raw_list:
        cat_str = str(c).strip()
        
        # REGULA 1: Eliminam daca incepe cu paranteza -> (Public Domain), (Unknown)
        if cat_str.startswith('('):
            continue
            
        # REGULA 2: Eliminam dacă incepe cu o cifra -> Anii (2008, 1999...)
        if cat_str and cat_str[0].isdigit():
            continue
            
        # REGULA 3: Eliminam pe baza cuvintelor interzise (Premii, Edituri)
        is_junk = False
        for keyword in BANNED_KEYWORDS:
            if keyword.lower() in cat_str.lower():
                is_junk = True
                break
        
        # Daca a trecut de toate filtrele, e categorie buna (ex: Wargame)
        if not is_junk and len(cat_str) > 2: # Eliminam stringuri foarte scurte
            clean_list.append(cat_str)
            
    return clean_list

# Upload
if 'df' not in locals() or df.empty:
    print("EROARE: DataFrame gol. Rulează Celula 2.")
else:
    print(f"Încep upload-ul CURAT pentru {len(df)} jocuri...")
    requests = []
    count = 0
    total = len(df)
    
    for index, row in df.iterrows():
        item_id = str(row['objectid'])
        
        # curatare!!
        raw_cats = row['boardgamecategory']
        clean_cats = get_clean_categories(raw_cats)
        
        values = {
            'name': str(row['name']),
            'categories': clean_cats,
            'mechanics': row['boardgamemechanic'],
            'description': str(row.get('description', '')),
            'average': float(row.get('average', 0.0)),
            'yearpublished': int(row['yearpublished']) if pd.notna(row['yearpublished']) else 0,
            'minplayers': int(row['minplayers']) if pd.notna(row['minplayers']) else 0,
            'maxplayers': int(row['maxplayers']) if pd.notna(row['maxplayers']) else 0,
            'minplaytime': int(row['minplaytime']) if pd.notna(row['minplaytime']) else 0,
            'minage': int(row['minage']) if pd.notna(row['minage']) else 0
        }
        
        requests.append(SetItemValues(item_id, values, cascade_create=True))

        if len(requests) >= 1000:
            client.send(Batch(requests))
            count += len(requests)
            print(f"Progres: {count} / {total} jocuri curățate și salvate...", end='\r')
            requests = []

    if requests:
        client.send(Batch(requests))

    print(f"\n Baza de date a fost actualizata cu categorii curate")

Încep upload-ul CURAT pentru 15000 jocuri...
Progres: 15000 / 15000 jocuri curățate și salvate...
 Baza de date a fost actualizata cu categorii curate


# Incarcarea utilizatorilor

In [6]:
print("Procesare utilizatori...")
try:
    df_users = pd.read_csv('people.csv')
except FileNotFoundError:
    df_users = pd.DataFrame({'UserId': ['u1'], 'Name': ['Test User']})

def generate_email(name):
    if pd.isna(name): return "unknown@fictiv.ro"
    clean = unidecode.unidecode(str(name)).lower()
    parts = clean.split()
    if len(parts) >= 2: return f"{parts[0]}.{parts[-1]}@fictiv.ro"
    return f"{clean}@fictiv.ro"

col_name = 'Name' if 'Name' in df_users.columns else df_users.columns[1]
df_users['email'] = df_users[col_name].apply(generate_email)

# Definim proprietatile userilor
try: client.send(AddUserProperty('email', 'string'))
except: pass
try: client.send(AddUserProperty('name', 'string'))
except: pass

reqs = []
for idx, row in df_users.iterrows():
    uid = str(row.get('UserId', f"user_{idx}"))
    vals = {'name': str(row[col_name]), 'email': row['email']}
    reqs.append(SetUserValues(uid, vals, cascade_create=True))
    
    if len(reqs) >= 1000:
        client.send(Batch(reqs))
        reqs = []
if reqs: client.send(Batch(reqs))

print(f"{len(df_users)} useri incarcati.")

Procesare utilizatori...
33 useri incarcati.


# Generare rating-uri

In [7]:
print("Generare rating-uri...")
all_game_ids = df['objectid'].astype(str).tolist()
all_user_ids = [str(row.get('UserId', f"user_{idx}")) for idx, row in df_users.iterrows()]

reqs = []
for uid in all_user_ids:
    # 20 jocuri random per user
    try: selected = random.sample(all_game_ids, 20)
    except: selected = all_game_ids
    
    for gid in selected:
        # Rating convertit din 1-10 in -1.0 - 1.0 (asa cum e recomandat de recombee)
        val = (random.uniform(1.0, 10.0) - 1) / 9 * 2 - 1
        reqs.append(AddRating(uid, gid, val, cascade_create=True))
    
    if len(reqs) >= 1000:
        client.send(Batch(reqs))
        reqs = []
        
if reqs: client.send(Batch(reqs))

print("Rating-uri trimise.")

Generare rating-uri...
Rating-uri trimise.


# Recomandari pentru un user existent

In [8]:
import random

print("--- DEMO 1: Recomandari Personalizate (User Existent) ---")

# Alegem un ID de user din cei incarcati (ex: user_5)
random_user_id = f"user_{random.randint(0, len(df_users)-1)}"

print(f"Generam recomandari pentru: {random_user_id}")

try:
    # Cerem 5 recomandari
    recommended = client.send(RecommendItemsToUser(random_user_id, 5, return_properties=True))
    
    print(f"Top 5 jocuri recomandate pentru {random_user_id}:")
    for rec in recommended['recomms']:
        vals = rec['values']
        print(f"   - {vals['name']}")
        print(f"     Categorii: {vals['categories']}")
        print(f"     Nota medie globala: {vals['average']}")
        print("     --------------------------------")

except ResponseException as e:
    print(f"Eroare: {e}")

--- DEMO 1: Recomandari Personalizate (User Existent) ---
Generam recomandari pentru: user_21
Top 5 jocuri recomandate pentru user_21:
   - Hemloch  Midnight Edition
     Categorii: ['Card Game', 'Fantasy']
     Nota medie globala: 7.02222
     --------------------------------
   - Caverna  The Cave Farmers
     Categorii: ['Animals', 'Economic', 'Farming', 'Fantasy']
     Nota medie globala: 8.0456
     --------------------------------
   - Saipan  The Bloody Rock
     Categorii: ['Wargame', 'World War II']
     Nota medie globala: 8.28218
     --------------------------------
   - Holdfast  EastFront 1941-45
     Categorii: ['Wargame', 'World War II']
     Nota medie globala: 7.854430000000001
     --------------------------------
   - A Touch of Evil  The Supernatural Game
     Categorii: ['Horror', 'Fighting', 'Murder/Mystery', 'Adventure']
     Nota medie globala: 6.9638100000000005
     --------------------------------


# Cautare cu filtre multiple alese de un user nou (abordarea cold start)

In [10]:
import pandas as pd
from recombee_api_client.api_requests import SetUserValues, ListItems
from recombee_api_client.exceptions import ResponseException

print("--- DEMO AVANSAT: Filtrare Completa ---")

# --- 1. Pregatire Meniu Categorii ---
clean_categories_set = set()
# Copiem logica de curatare pentru afisare
BANNED = ['Nominee', 'Winner', 'Recommended', 'Prize', 'Award', 'Games', 'Best', 'Jahres']

if 'df' in locals() and 'boardgamecategory' in df.columns:
    for cats in df['boardgamecategory']:
        if isinstance(cats, list):
            for c in cats:
                s = str(c).strip()
                if s.startswith('(') or (s and s[0].isdigit()): continue
                is_bad = False
                for b in BANNED:
                    if b.lower() in s.lower(): is_bad = True
                if not is_bad and len(s) > 2: clean_categories_set.add(s)
    sorted_cats = sorted(list(clean_categories_set))
else:
    sorted_cats = ['Economic', 'Fantasy', 'Wargame', 'Card Game']

def advanced_search_app(user_id):
    # A. Creare user
    try: client.send(SetUserValues(user_id, {'name': 'Advanced Tester'}, cascade_create=True))
    except: pass

    print(f"\nSalut, {user_id}!")
    print(f"Categorii disponibile: {', '.join(sorted_cats[:10])}...")
    
    # --- B. PRELUARE INPUTURI ---
    
    # 1. Categorii
    raw_cat = input("\n1. Ce categorii preferi? (ex: Economic, Wargame): ")
    sel_cats = [x.strip() for x in raw_cat.split(',') if x.strip()]
    
    # 2. Varsta
    raw_age = input("2. Care este varsta celui mai tanar jucator? (scrie numar sau lasa gol): ")
    target_age = int(raw_age) if raw_age.strip().isdigit() else None
    
    # 3. Numar Jucatori
    raw_players = input("3. Cati jucatori sunteti? (scrie numar sau lasa gol): ")
    target_players = int(raw_players) if raw_players.strip().isdigit() else None

    # --- C. CONSTRUIRE FILTRU ReQL ---
    filter_parts = []

    # Partea 1: Categorii (Logica OR intre categorii)
    cat_filters = []
    for user_input in sel_cats:
        for db_cat in sorted_cats:
            if db_cat.lower() == user_input.lower():
                cat_filters.append(f'"{db_cat}" in \'categories\'')
                break
    
    if cat_filters:
        # Punem paranteze pentru ca avem OR: (CatA sau CatB)
        filter_parts.append(f"({' or '.join(cat_filters)})")
        print(f"   -> Filtru Categorii: {sel_cats}")
    elif raw_cat:
        print("   -> Atentie: Nu am recunoscut categoriile scrise.")

    # Partea 2: Varsta (Logica: minage din baza de date <= varsta userului)
    if target_age:
        filter_parts.append(f"'minage' <= {target_age}")
        print(f"   -> Filtru Varsta: Jocuri pentru {target_age}+ ani")

    # Partea 3: Jucatori (Logica: minplayers <= X <= maxplayers)
    if target_players:
        filter_parts.append(f"'minplayers' <= {target_players} and 'maxplayers' >= {target_players}")
        print(f"   -> Filtru Jucatori: Suporta {target_players} persoane")

    # Asamblare finala cu AND
    if not filter_parts:
        print("Nu ai setat niciun filtru. Iti arat jocuri populare random.")
        final_query = None
    else:
        final_query = " and ".join(filter_parts)
        print(f"\nQUERY FINAL RECOMBEE: {final_query}")

    # --- D. EXECUTARE ---
    try:
        res = client.send(ListItems(
            filter=final_query, 
            count=5, 
            return_properties=True
        ))
        
        items = res if isinstance(res, list) else res.get('recomms', [])
        
        print(f"\n RECOMANDARI")
        if not items:
            print("Nu s-au gasit jocuri care sa respecte TOATE conditiile.")
        
        for item in items:
            val = item.get('values', item)
            name = val.get('name')
            cats = val.get('categories')
            # Afisam si detaliile tehnice ca sa verificam filtrele
            min_age = val.get('minage')
            players = f"{val.get('minplayers')}-{val.get('maxplayers')}"
            
            print(f"• {name}")
            print(f"  Categorii: {cats}")
            print(f"  Varsta Min: {min_age} | Jucatori: {players}")
            print(f"  Nota Medie: {val.get('average')}")
            print("-" * 30)
            
    except ResponseException as e:
        print(f"Eroare Recombee: {e}")

# RULARE
advanced_search_app("User_Avansat")

--- DEMO AVANSAT: Filtrare Completa ---

Salut, User_Avansat!
Categorii disponibile: ADC Blackfire Entertainment, Abstract Strategy, Action / Dexterity, Adventure, Age of Reason, American Civil War, American Indian Wars, American Revolutionary War, American West, Ancient...
   -> Filtru Categorii: ['Economic', 'Animals']
   -> Filtru Varsta: Jocuri pentru 12+ ani
   -> Filtru Jucatori: Suporta 3 persoane

QUERY FINAL RECOMBEE: ("Economic" in 'categories' or "Animals" in 'categories') and 'minage' <= 12 and 'minplayers' <= 3 and 'maxplayers' >= 3

 RECOMANDARI
• Tripolo
  Categorii: ['Educational', 'Childrens Game', 'Animals']
  Varsta Min: 6 | Jucatori: 2-4
  Nota Medie: 6.611110000000001
------------------------------
• My Happy Farm
  Categorii: ['Card Game', 'Childrens Game', 'Farming', 'Animals']
  Varsta Min: 8 | Jucatori: 2-4
  Nota Medie: 6.462269999999999
------------------------------
• Axis & Allies
  Categorii: ['Wargame', 'World War II', 'Economic']
  Varsta Min: 12 | Jucat