In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as pltfrom 
from sklearn.manifold import TSNE


In [None]:
df = pd.read_csv('topics_subtopics_abridged.csv')

In [None]:
df_encoded = pd.get_dummies(df, prefix_sep="_", columns=df.columns)
# Convert the high-dimensional data to 2D points
tsne = TSNE(n_components=2, random_state=42)
reduced = tsne.fit_transform(df_encoded)


In [None]:
from sklearn.cluster import KMeans

n_clusters = 20  # Adjust this based on your preference
kmeans = KMeans(n_clusters=n_clusters)
df_encoded['cluster'] = kmeans.fit_predict(df_encoded)


In [None]:
import plotly.express as px

# Create a DataFrame for the reduced data
df_reduced = pd.DataFrame(reduced, columns=['x', 'y'])
df_reduced['details'] = df.apply(lambda row: ' | '.join(row.dropna().values), axis=1)
df_reduced['cluster'] = df_encoded['cluster']

fig = px.scatter(df_reduced, x='x', y='y', color='cluster', hover_data=['details'], title="Interactive Visualization of Topics")
fig.show()


In [None]:
# convert csv to hiearchy for treemap
import pandas as pd

df = pd.read_csv('hierarchical_data.csv')
hierarchy = {"name": "root", "children": []}

for column in df.columns:
    topics_data = {
        "name": column,
        "children": []
    }

    for entry in df[column]:
        if pd.notna(entry):
            topics_data["children"].append({"name": entry})

    hierarchy["children"].append(topics_data)


In [None]:
# Flatten the hierarchy into a DataFrame for Plotly
data = []

def flatten_hierarchy(node, parent_name=''):
    if 'children' in node:
        name = node['name']
        if parent_name:
            data.append([parent_name, name])
        for child in node['children']:
            flatten_hierarchy(child, name)
    else:
        data.append([parent_name, node['name']])
        
flatten_hierarchy(hierarchy)

df_plotly = pd.DataFrame(data, columns=['parent', 'name'])


In [None]:
import plotly.express as px

fig = px.treemap(df_plotly, path=['parent', 'name'])
fig.show()


In [None]:
import plotly.offline as offline

offline.plot(fig, filename='treemap.html', auto_open=False)


In [None]:
# topics as nodes and subtopics as edges with broken data
# use as model for network graph with researchers
import networkx as nx
from pyvis.network import Network
df = pd.read_csv('network_graph.csv')


In [None]:
# Initialize dictionary and plot
topics_data = {}

G = nx.Graph()

for column in df.columns:
    main_topic = column
    G.add_node(main_topic)
    
    for subtopic in df[column].dropna():
        G.add_node(subtopic)
        G.add_edge(main_topic, subtopic)

# Plot the network outside of jupyter notebook
nt = Network(notebook=True, cdn_resources='remote')
nt.from_nx(G)
nt.show('graph.html')

In [None]:
# Make it look pretty with weights and colors

G = nx.Graph()

for column in df.columns:
    main_topic = column
    G.add_node(main_topic)
    
    for subtopic in df[column].dropna():
        G.add_node(subtopic)
        G.add_edge(main_topic, subtopic)

# Coloring edges
edge_colors = {}
degrees = G.degree()
for edge in G.edges():
    # Assuming the second node in the edge tuple is the subtopic
    if degrees[edge[1]] > 1:
        edge_colors[edge] = 'red'  # Color for edges where subtopic connects with multiple main topics
    else:
        edge_colors[edge] = 'blue'  # Default color

nt = Network(notebook=True, cdn_resources='in_line', )
nt.from_nx(G)
for edge in nt.edges:
    edge['color'] = edge_colors[(edge['from'], edge['to'])]

nt.show('graph_color.html')

In [None]:
# Now a sunburst!

import plotly.graph_objects as go
df = pd.read_csv('topics_subtopics_burst.csv')
# Prepare the labels, values, and parents
topics_data = {}
for column in df.columns:
    topics_data[column] = df[column].dropna().tolist()



In [None]:
labels = []
values = []
parents = []

In [None]:
for main_topic, subtopics in topics_data.items():
    labels.append(main_topic)
    values.append(0)  # Main topic itself doesn't have a "value" in this context
    parents.append('')  # Main topics don't have parents

    for subtopic in subtopics:
        labels.append(subtopic)
        values.append(1)  # Each subtopic has a value of 1
        parents.append(main_topic)  # Parent of each subtopic is the main topic


In [None]:
import plotly.graph_objects as go
import plotly.io as pio


# Creating the Sunburst chart
fig = go.Figure(go.Sunburst(
    labels=labels,
    values=values,
    parents=parents,
    maxdepth=2  # Only two levels: main topic and subtopic
))

pio.renderers.default = 'browser'
fig.show()

fig.write_html("sunburst_chart.html")

In [None]:
print(labels[:10])  # first 10 labels
print(values[:10])  # first 10 values
print(parents[:10]) # first 10 parents

print(labels[-10:])  # last 10 labels
print(values[-10:])  # last 10 values
print(parents[-10:]) # last 10 parents