# Generic Graph Explorer Template
This notebook provides a template for exploring and analyzing graph data using Python libraries such as NetworkX and pandas. Replace the data loading and filtering steps with your own data sources and requirements.

In [1]:
import networkx as nx
import pandas as pd
import numpy as np
from pyvis.network import Network
import os
import glob
import datetime 
import matplotlib.pyplot as plt
import scipy as sp
from PIL import Image
import io
import pyperclip

## Load and Prepare Data
Replace the file loading and filtering logic below with your own data source and preprocessing steps.

In [2]:
# Example: Load CSV files matching a pattern (update the pattern to your data location)
files = glob.glob(os.path.join(os.getcwd(), 'path/to/your/data/**/*.csv'), recursive=True)
df = pd.DataFrame(columns=['source', 'target', 'url', 'created_at', 'title'])

for file in files:
    print(file)
    temp_df = pd.read_csv(file)
    temp_df['title'] = temp_df['title'].replace(np.nan, '[No Title]')
    df = pd.concat([df, temp_df], ignore_index=True)

df.drop_duplicates(inplace=True)
df['target'] = df['target'].str.split('#').str[0]
df["created_at"] = pd.to_datetime(df["created_at"], errors='coerce')
df = df[df['created_at'].notnull()]
df['Start'] = df.created_at.apply(lambda x: x.isoformat())
df['End'] = df.created_at.apply(lambda t: t+pd.Timedelta(weeks=1)).apply(lambda x: x.isoformat())

# Example filters (customize as needed)
df = df[df['title'].str.contains('standup', case=False) == False]
df = df[df['title'].str.startswith('Standup ') == False]

df.to_csv("alldata.csv")

## Build Graph
Create a NetworkX graph from the dataframe. Adjust the column names as needed for your data.

In [3]:
G = nx.from_pandas_edgelist(df, source='source', target='target')
for node in G.nodes():
    G.nodes()[node]['Label'] = str(node)
    G.nodes()[node]['Start'] = df[df.source == node].Start.min().split("T")[0] if not df[df.source == node].empty else ''
    G.nodes()[node]['End'] = df[df.source == node].End.max().split("T")[0] if not df[df.source == node].empty else ''

nx.write_gml(G, "all_dynamic.gml")

In [4]:
G = nx.from_pandas_edgelist(df, source='source', target='target', edge_attr='created_at')
node_types = []
for node in G.nodes():
    # Example: Assign colors/sizes based on node type or other logic
    G.nodes()[node]['color'] = 'green'
    G.nodes()[node]['size'] = 20
    node_types.append(node)

In [7]:
span = 14
iterations = (max(df['created_at'])  - min(df['created_at']) ).days // span
results = []
evc_results = []
for iteration in range(iterations):
    delta = datetime.timedelta(days=iteration * span)
    start =  min(df['created_at']) + delta
    end =  min(df['created_at']) + datetime.timedelta(days=span) + delta
    SG = nx.from_pandas_edgelist(df[(df['created_at']>start) & (df['created_at']<end)] , source='source', target='target') 
    results.append([start,  SG.number_of_edges(), nx.average_node_connectivity(SG), nx.density(SG), SG.number_of_nodes()])
    print(start, end)
    evc = nx.eigenvector_centrality(SG, max_iter=10000)
    evc_results.append([start] +[ evc.get(node, 0) for node in node_types ])

stats = pd.DataFrame(results, columns=['start_date', 'edges', 'mean_node_connectivity', 'density', "nodes"])
evc_results_df = pd.DataFrame(evc_results ,columns=['start_date'] + node_types)
evc_results_df.replace(0, np.nan)
evc_results_df["mean"] = evc_results_df[node_types].median(axis=1)

ValueError: max() iterable argument is empty

In [6]:
stats

NameError: name 'stats' is not defined

In [None]:
stats.to_clipboard()
stats

In [None]:
# Plotting
fig, ax1 = plt.subplots(figsize=(14, 8))

# Plot Mean Node Connectivity
color = 'tab:red'
ax1.set_xlabel('Date')
ax1.set_ylabel('Mean Node Connectivity', color=color)
ax1.plot(stats['start_date'], stats['mean_node_connectivity'], color=color, marker='o', linestyle='-', linewidth=2, markersize=8, label='Mean Node Connectivity')
ax1.tick_params(axis='y', labelcolor=color)

# Create a second y-axis for edges and density
ax2 = ax1.twinx()  
ax2.set_ylabel('Nodes', color='tab:blue')
ax2.plot(stats['start_date'], stats['nodes'], color='tab:blue', marker='s', linestyle='--', linewidth=2, markersize=8, label='Nodes')
ax2.tick_params(axis='y', labelcolor='tab:blue')

# Create a third y-axis for density
ax3 = ax1.twinx()  
ax3.spines['right'].set_position(('outward', 60))
ax3.set_ylabel('Density', color='tab:green')
ax3.plot(stats['start_date'], stats['density'], color='tab:green', marker='^', linestyle='-.', linewidth=2, markersize=8, label='Density')
ax3.tick_params(axis='y', labelcolor='tab:green')

# Adding legends
lines = ax1.get_lines() + ax2.get_lines() + ax3.get_lines()
labels = [line.get_label() for line in lines]
ax1.legend(lines, labels, loc='upper left')

# Add grid, title and format x-axis
ax1.grid(True)
fig.suptitle('Edges, Mean Node Connectivity, and Density over Time')
fig.tight_layout()

# Save the plot as an image
image_path = 'plot_image.png'
plt.savefig(image_path)

plt.show()

# Open the saved image
with Image.open('plot_image.png') as img:
    buffer = io.BytesIO()
    img.save(buffer, format='PNG')
    image_data = buffer.getvalue()

# Copy the image data to the clipboard
pyperclip.copy(image_data)

In [None]:
# Example: Plotting various statistics
to_plot = ['mean_node_connectivity', 'edges', 'density']
for col in to_plot:
    stats.plot(x="start_date", y=col)
    plt.xticks(rotation=45)

evcs = [col for col in evc_results_df.columns if col not in ['start_date', 'mean']]
evcs = evcs[:5]  # Plot only first 5 for clarity
for node in evcs:
    evc_results_df.plot(x="start_date", y=node)
    plt.xticks(rotation=45)

In [None]:
evcs = [col for col in evc_results_df.columns if col not in ['start_date', 'mean']]
evcs = evcs[:5]
evcs_df = evc_results_df[evcs]
evcs_df.plot(kind='line', figsize=(14, 8))  

## Team Work (Optional)
You can add logic here to color or size nodes based on team membership or other attributes relevant to your data.

In [None]:
# Example: Visualize a subgraph
graph_to_visualize = G  # or SG, or any subgraph
graph_net = Network(height="750px", width="100%", bgcolor="#222222", font_color="white", notebook=True)
graph_net.from_nx(graph_to_visualize)
graph_net.show_buttons(filter_=['physics'])
graph_net.save_graph('team-graph.html')

## Centrality Measures
Calculate and display centrality measures for your graph.

In [None]:
closeness_centrality = list(sorted(nx.closeness_centrality(G).items(), key=lambda item: item[1], reverse=True))
print("Top nodes by closeness centrality:")
for item in closeness_centrality[:10]:
    print(item[0], item[1])

betweenness_centrality = list(sorted(nx.betweenness_centrality(G).items(), key=lambda item: item[1], reverse=True))
print("Top nodes by betweenness centrality:")
for item in betweenness_centrality[:10]:
    print(item[0], item[1])

In [None]:
print("Number of edges", G.number_of_edges())
print("Average Node Connectivity", nx.average_node_connectivity(G))
print("Density", nx.density(G))

In [None]:
nx.write_gexf(G, "all_dynamic.gexf")