In [None]:
import pandas as pd
import numpy as np
import scipy.spatial.distance as ssd
import networkx as nx

# Function to calculate the Jensen-Shannon distance
def jensen_shannon_distance(p, q):
    m = (p + q) / 2
    return np.sqrt(0.5 * (ssd.jensenshannon(p, m) + ssd.jensenshannon(q, m)))

# Function to calculate document similarity
def calculate_document_similarity(doc1, doc2, threshold=0.05):
    # Tokenize and count words in each document
    tokenized_doc1 = doc1.lower().split()
    tokenized_doc2 = doc2.lower().split()
    word_counts_doc1 = pd.Series(tokenized_doc1).value_counts()
    word_counts_doc2 = pd.Series(tokenized_doc2).value_counts()
    
    # Create a set of all unique words in both documents
    word_set = set(word_counts_doc1.index) | set(word_counts_doc2.index)
    
    # Convert word counts to probability distributions
    p = np.zeros(len(word_set))
    q = np.zeros(len(word_set))
    
    for i, word in enumerate(word_set):
        p[i] = word_counts_doc1.get(word, 0) / len(tokenized_doc1)
        q[i] = word_counts_doc2.get(word, 0) / len(tokenized_doc2)
    
    # Calculate the Jensen-Shannon distance
    dist = jensen_shannon_distance(p, q)
    
    # Check if distance is less than threshold
    if dist <= threshold:
        return True
    else:
        return False

# Function to create a graph from a list of documents
def create_document_graph(docs, threshold=0.05):
    n = len(docs)
    G = nx.Graph()
    
    # Add nodes to the graph
    for i in range(n):
        G.add_node(i)
    
    # Add edges to the graph
    for i in range(n):
        for j in range(i+1, n):
            if calculate_document_similarity(docs[i], docs[j], threshold=threshold):
                G.add_edge(i, j)
    
    return G

# Function to cluster documents using the Louvain method
def cluster_documents_louvain(docs, threshold=0.05):
    # Create a graph from the documents
    G = create_document_graph(docs, threshold=threshold)
    
    # Cluster the graph using the Louvain method
    partition = nx.community.modularity_max.greedy_modularity_communities(G)
    
    # Convert the partition to a list of lists
    clusters = []
    for part in partition:
        clusters.append(list(part))
    
    return clusters

# Function to cluster documents using the Girvan-Newman method
def cluster_documents_girvan_newman(docs, threshold=0.05):
    # Create a graph from the documents
    G = create_document_graph(docs, threshold=threshold)
    
    # Cluster the graph using the Girvan-Newman method
    partition = nx.community.girvan_newman(G)
    clusters = []
    
    # Get the clustering at each level of the dendrogram
    for part in partition:
        if len(part) > 1:
            clusters.append([list(c) for c in part])
    
    return clusters
