In [None]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import joblib

# CSV laden
df = pd.read_csv("raw_data/Ratings.csv", encoding="latin-1", on_bad_lines='skip')

# Nur sinnvolle Bewertungen
df = df[df['Book-Rating'] > 0]

# Nutzer mit > 50 Bewertungen
user_counts = df['User-ID'].value_counts()
active_users = user_counts[user_counts > 50].index
df = df[df['User-ID'].isin(active_users)]

# Bücher mit > 50 Bewertungen
book_counts = df['ISBN'].value_counts()
popular_books = book_counts[book_counts > 50].index
df = df[df['ISBN'].isin(popular_books)]

# Pivot-Tabelle (User x ISBN)
pivot = df.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating').fillna(0)

# Ähnlichkeitsmatrix
similarity = cosine_similarity(pivot)

# Modell speichern
joblib.dump((pivot, similarity), 'book_model.joblib')

print("✅ Modell erfolgreich gespeichert.")