In [1]:
import pandas as pd
import os
from sklearn.feature_extraction.text import TfidfVectorizer
from collections import defaultdict
import numpy as np

# Load clustered file
df = pd.read_csv("clustered_papers.csv")
input_folder = "abstract_intro"

# Map cluster -> list of texts
cluster_texts = defaultdict(list)

for _, row in df.iterrows():
    if row["cluster"] == -1:
        continue  # skip noise points
    with open(os.path.join(input_folder, row["filename"]), "r", encoding="utf-8") as f:
        text = f.read()
        cluster_texts[row["cluster"]].append(text)

# Compute TF-IDF keywords per cluster
top_k = 10
cluster_keywords = {}

for cluster_id, texts in cluster_texts.items():
    vec = TfidfVectorizer(stop_words="english", max_features=1000)
    X = vec.fit_transform(texts)
    scores = np.asarray(X.mean(axis=0)).flatten()
    top_terms = [vec.get_feature_names_out()[i] for i in scores.argsort()[::-1][:top_k]]
    cluster_keywords[cluster_id] = top_terms

# Print sample keywords
for cid, keywords in cluster_keywords.items():
    print(f"Cluster {cid}: {', '.join(keywords)}")


Cluster 7: quantum, distributed, computing, protocol, protocols, verification, process, program, processes, classical
Cluster 4: quantum, learning, neural, propose, machine, classical, networks, network, control, systems
Cluster 2: quan, tum, quantum, university, dissertation, thesis, proof, adolescent, dumachev, aghamohammadi
Cluster 9: quantum, university, geometric, phase, spin, physics, states, state, adiabatic, study
Cluster 17: code, codes, quantum, surface, error, qubits, qubit, logical, correction, fault
Cluster 6: quantum, error, gravity, theory, field, black, holographic, correction, model, hole
Cluster 30: quantum, codes, error, correcting, code, correction, errors, stabilizer, qubit, classical
Cluster 31: quantum, error, codes, correction, code, channel, correcting, errors, information, channels
Cluster 13: quantum, teleportation, entanglement, cloning, communication, states, state, information, propose, protocol
Cluster 0: quantum, group, groups, graphs, algebras, theory, 

In [3]:
! pip install streamlit






In [None]:
import streamlit as st
import pandas as pd
import os

# Load clustering results
df = pd.read_csv("clustered_papers.csv")
input_folder = "abstract_intro"

# Sidebar filter
st.sidebar.title("Cluster Filter")
cluster_ids = sorted(df["cluster"].unique())
selected = st.sidebar.multiselect("Select Cluster(s)", cluster_ids, default=cluster_ids)

# Main content
st.title("Quantum Papers Cluster Viewer")
st.write(f"Showing files from {len(selected)} clusters.")

filtered = df[df["cluster"].isin(selected)]
for _, row in filtered.iterrows():
    st.markdown(f"###  {row['filename']}")
    with open(os.path.join(input_folder, row["filename"]), "r", encoding="utf-8") as f:
        st.write(f.read())
    st.caption(f"Cluster: {row['cluster']}")
    st.markdown("---")


2025-05-29 10:48:59.508 
  command:

    streamlit run C:\Users\learn\AppData\Roaming\Python\Python312\site-packages\ipykernel_launcher.py [ARGUMENTS]
