In [None]:
import pandas as pd
import numpy as np

# Load dataset
ratings = pd.read_csv('/kaggle/input/bookcrossing-dataset/Ratings.csv', sep=';', on_bad_lines='skip')

# Pivot to create user-item matrix
user_item_matrix = ratings.pivot(index='User-ID', columns='ISBN', values='Rating').fillna(0)

# Normalize preferences (optional)
user_item_matrix = (user_item_matrix - user_item_matrix.mean(axis=1).values[:, None])
import networkx as nx
from sklearn.metrics.pairwise import cosine_similarity

# Create graph for a user based on their preferences
def create_user_graph(preferences):
    graph = nx.Graph()
    for i, item1 in enumerate(preferences):
        for j, item2 in enumerate(preferences):
            if i != j and preferences[i] > 0 and preferences[j] > 0:
                graph.add_edge(i, j, weight=preferences[i] * preferences[j])
    return graph

# Compute Top-K similarities
def top_k_similarities(matrix, k=5):
    similarities = cosine_similarity(matrix)
    top_k = np.argsort(-similarities, axis=1)[:, :k]
    return top_k

# Compute GNN-based similarity (placeholder function)
def compute_gnn_similarity(graph1, graph2, top_k_indices):
    # Example: simple cosine similarity on graph adjacency matrices
    adj1 = nx.to_numpy_matrix(graph1)
    adj2 = nx.to_numpy_matrix(graph2)
    return cosine_similarity(adj1, adj2)[0, 0]
    # Create graphs for all users
user_graphs = [create_user_graph(user_item_matrix.iloc[i].values) for i in range(user_item_matrix.shape[0])]

# Compute pairwise similarities
similarities = []
for i in range(len(user_graphs)):
    for j in range(len(user_graphs)):
        if i != j:
            top_k_indices = top_k_similarities(user_item_matrix.values, k=5)
            sim = compute_gnn_similarity(user_graphs[i], user_graphs[j], top_k_indices)
            similarities.append((i, j, sim))
# Create graph for clustering
G = nx.Graph()
G.add_nodes_from(range(len(user_graphs)))
G.add_weighted_edges_from(similarities)

# Dominant Set Clustering
def dominant_set_clustering(graph, max_clusters=5):
    clusters = []
    nodes = list(graph.nodes())
    while nodes and len(clusters) < max_clusters:
        dominant_set = nx.maximal_independent_set(graph.subgraph(nodes))
        clusters.append(dominant_set)
        nodes = list(set(nodes) - set(dominant_set))
    return clusters

# Get clusters
clusters = dominant_set_clustering(G)
print("Clusters:", clusters)

  user_item_matrix = ratings.pivot(index='User-ID', columns='ISBN', values='Rating').fillna(0)
