In [None]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag

In [None]:
file_path =  'CRIPS_DM.txt'
f = open(file_path,"r")
text = f.read()
sent_text = nltk.sent_tokenize(text) # this gives us a list of sentences

In [None]:
import itertools

def extract_nouns(sentence:str) -> list: 
    list_cbo = []
    is_noun = lambda pos: pos[:2] == 'NN'
    tokenized = nltk.word_tokenize(sentence)
    nouns = [word for (word, pos) in nltk.pos_tag(tokenized) if is_noun(pos)] 

    combinations = list(itertools.combinations(nouns, 2))

    for sentence_stack in combinations:
        tmp = list(sentence_stack)
        tmp.append(sentence)
        list_cbo.append(tmp)

    return list_cbo

list_all_text = []
for sentence in sent_text:
    sentence_result = extract_nouns(sentence)
    for result in sentence_result: 
        list_all_text.append(result)

list_all_text

In [None]:
import pandas as pd

df = pd.DataFrame(list_all_text)
df

In [None]:
df.columns = ["node_1","node_2","edge"]
df

In [None]:
import networkx as nx
G = nx.Graph()

## Add nodes to the graph
for node in df:
    G.add_node(
        str(node)
    )

## Add edges to the graph
for index, row in df.iterrows():
    G.add_edge(
        str(row["node_1"]),
        str(row["node_2"]),
        title=row["edge"],
        weight=1)

In [None]:
communities_generator = nx.community.girvan_newman(G)
top_level_communities = next(communities_generator)
next_level_communities = next(communities_generator)
communities = sorted(map(sorted, next_level_communities))
print("Number of Communities = ", len(communities))
print(communities)

In [None]:
import seaborn as sns
import random
palette = "hls"

## Now add these colors to communities and make another dataframe
def colors2Community(communities) -> pd.DataFrame:
    ## Define a color palette
    p = sns.color_palette(palette, len(communities)).as_hex()
    random.shuffle(p)
    rows = []
    group = 0
    for community in communities:
        color = p.pop()
        group += 1
        for node in community:
            rows += [{"node": node, "color": color, "group": group}]
    df_colors = pd.DataFrame(rows)
    return df_colors


colors = colors2Community(communities)
colors

In [None]:
for index, row in colors.iterrows():
    G.nodes[row['node']]['group'] = row['group']
    G.nodes[row['node']]['color'] = row['color']
    G.nodes[row['node']]['size'] = G.degree[row['node']]

In [None]:
from pyvis.network import Network

graph_output_directory = "./docs/index.html"

net = Network(
    notebook=False,
    # bgcolor="#1a1a1a",
    cdn_resources="remote",
    height="900px",
    width="100%",
    select_menu=True,
    # font_color="#cccccc",
    filter_menu=False,
)

net.from_nx(G)
# net.repulsion(node_distance=150, spring_length=400)
net.force_atlas_2based(central_gravity=0.015, gravity=-31)
# net.barnes_hut(gravity=-18100, central_gravity=5.05, spring_length=380)
net.show_buttons(filter_=["physics"])

net.show(graph_output_directory) #notebook=False)