# Importing Packages

In [None]:
import matplotlib.pyplot as plt
from networkx_viewer import Viewer
import matplotlib.colors as mcolors
from collections import defaultdict

#from model import spcall

%matplotlib inline

In [None]:
def draw(G, measures, measure_name):
  #* https://stackoverflow.com/a/52013202
  #* https://aksakalli.github.io/2017/07/17/network-centrality-measures-and-their-visualization.html
  #* https://www.datacamp.com/community/tutorials/social-network-analysis-python

  #* Create two lists of edges based on their weight.
  #* 'elarge' contains edges with weight greater than 5.
  #* 'esmall' contains edges with weight less than or equal to 5.
  elarge = [(u, v) for (u, v, d) in G.edges(data=True) if d['weight'] > 5]
  esmall = [(u, v) for (u, v, d) in G.edges(data=True) if d['weight'] <= 5]

  #* Generate a spring layout for the graph.
  pos = nx.spring_layout(G)

  #* Set the size of each node based on its corresponding measure value.
  node_size = [v * 1000 for v in measures.values()]

  #* Draw the nodes of the graph with their size and color determined by the measure values.
  #* The color map 'plt.cm.plasma' is used for coloring the nodes.
  nodes = nx.draw_networkx_nodes(G, pos, node_size=node_size, 
                                  cmap=plt.cm.plasma,
                                  node_color=list(measures.values()),
                                  nodelist=measures.keys())

  #* Set the color normalization of the nodes to be logarithmic.
  nodes.set_norm(mcolors.SymLogNorm(linthresh=0.01, linscale=1))
  
  #* Draw the edges of the graph.
  edges = nx.draw_networkx_edges(G, pos)
  
  #* Draw the 'elarge' and 'esmall' edges with different styles.
  #* The 'elarge' edges are drawn with a width of 2.
  #* The 'esmall' edges are drawn with a width of 2, transparency of 0.5, blue color, and dashed style.
  nx.draw_networkx_edges(G, pos, edgelist=elarge, width=2)
  nx.draw_networkx_edges(G, pos, edgelist=esmall, width=2, alpha=0.5, edge_color='blue', style='dashed')

  #* Add labels to the nodes with a font size of 10, blue color, and sans-serif font family.
  nx.draw_networkx_labels(G, pos, font_size=10, font_color='blue', font_family='sans-serif')
  
  #* Set the title of the plot, add a color bar, turn off the axis, and display the plot.
  plt.title(measure_name)
  plt.colorbar(nodes)
  plt.axis('off')
  plt.show()


In [None]:
def plot_G(G, measures):
  #* Define two lists of edges based on their weight.
  #* 'elarge' contains edges with weight greater than 5.
  #* 'esmall' contains edges with weight less than or equal to 5.
  elarge = [(u, v) for (u, v, d) in G.edges(data=True) if d['weight'] > 5]
  esmall = [(u, v) for (u, v, d) in G.edges(data=True) if d['weight'] <= 5]

  #* Generate a spring layout for the graph.
  #* This layout treats edges as springs holding nodes close, while treating nodes as repelling objects.
  pos = nx.spring_layout(G)
  
  #* Set the size of each node based on its corresponding measure value.
  #* The size is multiplied by 1000 for better visibility.
  node_size = [v * 1000 for v in measures.values()]

  #* Draw the nodes of the graph with their size and color determined by the measure values.
  #* The color map 'plt.cm.plasma' is used for coloring the nodes.
  nodes = nx.draw_networkx_nodes(G, pos, node_size=node_size,
                                cmap=plt.cm.plasma,
                                node_color=list(measures.values()),
                                nodelist=measures.keys())

  #* Set the color normalization of the nodes to be logarithmic.
  #* This can be useful if the measure values vary widely.
  nodes.set_norm(mcolors.SymLogNorm(linthresh=0.01, linscale=1))

  #* Draw the nodes of the graph again with a fixed size of 50 and a color map of 'plt.cm.plasma'.
  nx.draw_networkx_nodes(G, pos, node_size=50, cmap=plt.cm.plasma)

  #* Draw the 'elarge' and 'esmall' edges with different styles.
  #* The 'elarge' edges are drawn with a width of 2.
  #* The 'esmall' edges are drawn with a width of 2, transparency of 0.5, blue color, and dashed style.
  nx.draw_networkx_edges(G, pos, edgelist=elarge, width=2)
  nx.draw_networkx_edges(G, pos, edgelist=esmall, width=2, alpha=0.5, edge_color='blue', style='dashed')

  #* Add labels to the nodes with a font size of 10, black color, and sans-serif font family.
  nx.draw_networkx_labels(G ,pos, font_size=10, font_color='black', font_family='sans-serif')

  #* Turn off the axis and display the plot.
  plt.axis('off')
  plt.show()

In [None]:
def plot_Gp(G, measures):
  # Set the figure size to make the plot high-definition.
  plt.figure(figsize=(50, 50), dpi=300)

  #* Define two lists of edges based on their weight.
  #* 'elarge' contains edges with weight greater than 5.
  #* 'esmall' contains edges with weight less than or equal to 5.
  elarge = [(u, v) for (u, v, d) in G.edges(data=True) if d['weight'] > 5]
  esmall = [(u, v) for (u, v, d) in G.edges(data=True) if d['weight'] <= 5]

  #* Generate a spring layout for the graph.
  #* This layout treats edges as springs holding nodes close, while treating nodes as repelling objects.
  pos = nx.spring_layout(G, iterations=13, scale=300, seed=1234)

  #* Set the size of each node based on its corresponding measure value.
  #* The size is multiplied by 1000 for better visibility.
  node_size = [v * 1000 for v in measures.values()]

  #* Draw the nodes of the graph with their size and color determined by the measure values.
  #* The color map 'plt.cm.plasma' is used for coloring the nodes.
  nodes = nx.draw_networkx_nodes(G, pos, node_size=node_size, cmap=plt.cm.plasma, 
                                  node_color=list(measures.values()),
                                  nodelist=measures.keys())

  #* Set the color normalization of the nodes to be logarithmic.
  #* This can be useful if the measure values vary widely.
  nodes.set_norm(mcolors.SymLogNorm(linthresh=0.01, linscale=1))

  #* Create a color map based on the node labels.
  #* Different labels are mapped to different colors.
  color_map = []
  for node in G:
    if 'fac' in node:
        color_map.append('blue')
    elif 'stu' in node or '94fbd' in node:
        color_map.append('green')
    elif 'adm' in node:
        color_map.append('yellow')
    elif 'sub' in node:
        color_map.append('orange')
    elif 'isp' in node:
        color_map.append('red')
    elif 'bis' in node:
        color_map.append('purple')
    elif 'par' in node:
        color_map.append('black')
    elif 'sup' in node:
        color_map.append('aqua')
    elif node in ['grade', 'assignment', 'Bulletin Board', 're-assign', 'enroll', 'register', 'transfer', 'drop']:
        color_map.append('white')  # white for the specific nodes
    else:
        print (node)

  #* Draw the nodes of the graph again with a fixed size of 10 and a color map based on the node labels.
  nx.draw_networkx_nodes(G, pos, node_size=10, node_color=color_map, cmap=plt.cm.plasma)

  #* Draw the 'elarge' and 'esmall' edges with different styles.
  #* The 'elarge' edges are drawn with a width of 2 and gray color.
  #* The 'esmall' edges are drawn with a width of 2, transparency of 0.5, gray color, and dashed style.
  nx.draw_networkx_edges(G, pos, edgelist=elarge, edge_color='gray', width=1)
  nx.draw_networkx_edges(G, pos, edgelist=esmall, width=1, alpha=0.5, edge_color='gray', style='dashed')

  #* Add labels to the nodes with a font size of 10, gray color, and sans-serif font family.
  nx.draw_networkx_labels(G, pos, font_size=3, font_color='black', font_family='sans-serif')

  #* Turn off the axis and display the plot.
  plt.axis('off')

  #* Increase the DPI to 300 for a high-quality plot.
  plt.savefig("network.png", dpi=300)
  
  #* Display the plot 
  plt.show()


In [None]:
def plot_G2(G):
  #* Generate a spring layout for the graph.
  pos = nx.spring_layout(G)

  #* Draw the graph using NetworkX's built-in draw function.
  nx.draw_networkx(G)

  #* Add labels to the nodes with a font size of 10, gray color, and sans-serif font family.
  nx.draw_networkx_labels(G, pos, font_size=10, font_color='gray', font_family='sans-serif')

  #* Turn off the axis and display the plot.
  plt.axis('off')
  plt.show()

In [None]:
def writeandrender(filename, centrality):
  #* Extract the measure from the centrality dictionary.
  cmeasure = centrality["measure"]
  
  #* Sort the items in the measure dictionary in descending order based on their values.
  sorted_x = sorted(cmeasure.items(), key=lambda kv: kv[1], reverse=True)

  #* Check the 'overwrite' flag in the centrality dictionary.
  #* If it's True, open the file in write mode, which overwrites the existing content.
  #* If it's False, open the file in append mode, which adds to the existing content.
  if centrality["overwrite"]:
      f = open(filename + centrality["prefix"] + ".txt", 'w')
  else:
      f = open(filename + centrality["prefix"] + ".txt", 'a')
  
  #* Write the sorted items to the file.
  f.write(str(sorted_x))
  
  #* Close the file.
  f.close()

  #* Draw the weighted graph with the measure and the name from the centrality dictionary.
  draw(G_weighted, centrality["measure"], centrality["name"])

# Anonymizer for Agent to Task

In [None]:
# import csv
# import hashlib
# from collections import defaultdict
# import pandas as pd
# import networkx as nx

# def anonymize_id(id):
#     prefix = id[0]
#     year = id[1:5]
#     hash_object = hashlib.sha1(id.encode())
#     hex_dig = hash_object.hexdigest()
#     if prefix == 'A':
#         return 'adm' + hex_dig[:5]
#     elif prefix == 'F':
#         return 'fac' + hex_dig[:5]
#     elif prefix == 'S':
#         return 'stu' + hex_dig[:5]
#     elif prefix == 'P':
#         return 'par' + hex_dig[:5]
#     else:
#         return 'unk' + hex_dig[:5]

# def process_file(filename, is_comment=False):
#     with open(filename + ".csv", 'r') as f:
#         reader = csv.reader(f)
#         next(reader)

#         interactions = defaultdict(int)
#         rows = []
#         for row in reader:
#             initiatorid = anonymize_id(row[0])
#             receiverid = anonymize_id(row[2])  # Assuming 'receiverid' is the third column in your CSV file
#             # Skip the records that are prefixed with "unk"
#             if initiatorid.startswith('unk') or receiverid.startswith('unk'):
#                 continue
#             interactions[(initiatorid, receiverid)] += 1
#             if is_comment:  # If it's a comment, transform the commenter and use it as an intermediary node
#                 commenter = 'commenter_' + anonymize_id(row[1])
#                 rows.append([initiatorid, commenter, receiverid, row[3]])  # Assuming 'ts' is the fourth column in your CSV file
#             else:
#                 tltype = row[1] if filename != "reactions" else row[1]
#                 rows.append([initiatorid, tltype, receiverid, row[3]])  # Assuming 'ts' is the fourth column in your CSV file

#     # Create a DataFrame from the rows
#     df = pd.DataFrame(rows, columns=['initiatorid', 'tltype', 'receiverid', 'ts'])  # Add 'ts' to your DataFrame

#     # Convert the 'ts' column to datetime format
#     df['ts'] = pd.to_datetime(df['ts'])

#     # Define the date range
#     start_date = '2021-05-01'
#     end_date = '2021-05-31'

#     # Filter rows based on the date range
#     df = df[(df['ts'] >= start_date) & (df['ts'] <= end_date)]

#     # Check if any records prefixed with "unk" exist in the DataFrame
#     unk_records = df[(df['initiatorid'].str.startswith('unk')) | (df['receiverid'].str.startswith('unk'))]

#     if len(unk_records) > 0:
#         print("Warning: There are records prefixed with 'unk' in the DataFrame.")
#     else:
#         print("No records prefixed with 'unk' were found in the DataFrame.")

#     # Add a 'weight' column to the DataFrame
#     df['weight'] = df.apply(lambda row: interactions[(row['initiatorid'], row['receiverid'])], axis=1)

#     return df

# # Process each file and concatenate the results into one DataFrame
# df_timeline = process_file("timeline")
# df_reactions = process_file("reactions")
# df_comments = process_file("comments", is_comment=True)

# df_all = pd.concat([df_timeline, df_reactions, df_comments])

# # Create a new weighted graph.
# G_weighted = nx.Graph()

# # For each row in the DataFrame, add edges to the graph.
# # The nodes are the 'initiatorid', 'tltype', and 'receiverid' columns of the row.
# # The weight of the edge is the 'weight' column of the row.
# for index, row in df_all.iterrows():
#     G_weighted.add_edge(row['initiatorid'], row['tltype'], weight=row['weight'])
#     G_weighted.add_edge(row['tltype'], row['receiverid'], weight=row['weight'])

In [None]:
#* Generate a graphml file: 
#nx.write_graphml_lxml(G_weighted, 'timeline_react_comments_agent_to_tasks_network_may.graphml')

# Process the raw CSV file 

In [None]:
# import csv
# import hashlib
# from collections import defaultdict
# import pandas as pd
# import networkx as nx

# def anonymize_id(id):
#     prefix = id[0]
#     year = id[1:5]
#     hash_object = hashlib.sha1(id.encode())
#     hex_dig = hash_object.hexdigest()
#     if prefix == 'A':
#         return 'adm' + hex_dig[:5]
#     elif prefix == 'F':
#         return 'fac' + hex_dig[:5]
#     elif prefix == 'S':
#         return 'stu' + hex_dig[:5]
#     elif prefix == 'P':
#         return 'par' + hex_dig[:5]
#     else:
#         return 'unk' + hex_dig[:5]

# filename = input("Filename:")
# with open(filename + ".csv", 'r') as f:
#     reader = csv.reader(f)
#     next(reader)

#     interactions = defaultdict(int)
#     rows = []
#     for row in reader:
#         initiatorid = anonymize_id(row[0])
#         receiverid = anonymize_id(row[1])
#         tltype = row[2]
#         interactions[(initiatorid, tltype)] += 1
#         interactions[(tltype, receiverid)] += 1
#         rows.append([initiatorid, tltype, receiverid])

# # Create a DataFrame from the rows
# df = pd.DataFrame(rows, columns=['initiatorid', 'tltype', 'receiverid'])

# # Add a 'weight' column to the DataFrame
# df['weight'] = df.apply(lambda row: interactions[(row['initiatorid'], row['tltype'])] + interactions[(row['tltype'], row['receiverid'])], axis=1)

# # Now 'df' is a DataFrame that contains your preprocessed data.
# initiatorid = df['initiatorid']
# tltype = df['tltype']
# receiverid = df['receiverid']
# weight = df['weight']

# # Create a new weighted graph.
# G_weighted = nx.Graph()

# # For each row in the DataFrame, add edges to the graph.
# # The nodes are the 'initiatorid', 'tltype', and 'receiverid' columns of the row.
# # The weight of the edge is the 'weight' column of the row.
# for index, row in df.iterrows():
#     G_weighted.add_edge(row['initiatorid'], row['tltype'], weight=row['weight'])
#     G_weighted.add_edge(row['tltype'], row['receiverid'], weight=row['weight'])

# Visualize the Network

In [None]:
#* Plot the graph using the plot_Gp function.
#plot_Gp(G_weighted, measures=nx.degree_centrality(G_weighted))

In [None]:
#* Plot the graph using the plot_Gp function.
#plot_Gp(G_weighted, measures=nx.betweenness_centrality(G_weighted))

In [None]:
#* Plot the graph using the plot_Gp function.
#plot_Gp(G_weighted, measures=nx.eigenvector_centrality(G_weighted))

In [None]:
#* Generate a graphml version of this graph: 


# Basic Topological Attributes

In [None]:
#* Print the graph.
print(G_weighted)

# Centrality Measures 

## Degree Centrality 

In [None]:
#* Calculate the degree centrality of the graph.
# degree = nx.degree_centrality(G_weighted)

In [None]:
#* Draw the graph with node sizes proportional to their degree centrality.
# draw(G_weighted, nx.degree_centrality(G_weighted), 'Degree Centrality')

In [None]:
#* Sort the nodes by their degree centrality and print the sorted list.
# x = degree
# sorted_x = sorted(x.items(), key=lambda kv: kv[1], reverse=True)
# for item in sorted_x:
#     print(item)

In [None]:
#* Write the eigenvector centrality to a file and render it.
# writeandrender(
#     filename, {
#         "measure": degree,
#         "name": "Degree Centrality",
#         "prefix": "degree",
#         "overwrite": True
# })

## Eigenvector Centrality 

In [None]:
#* Calculate the eigenvector centrality of the graph.
# eigenvector = nx.eigenvector_centrality(G_weighted, max_iter=1000)

In [None]:
#* Draw the graph with node sizes proportional to their eigenvector centrality.
# draw(G_weighted, eigenvector, 'Eigenvector Centrality')

In [None]:
#* Sort the nodes by their betweenness centrality and print the sorted list.
# x = eigenvector
# sorted_x = sorted(x.items(), key=lambda kv: kv[1], reverse=True)
# for item in sorted_x:
#     print(item)

In [None]:
#* Write the eigenvector centrality to a file.
# with open(filename + "eigen.txt", 'a') as f:
#     f.write(str(eigenvector))

In [None]:
#* Write the eigenvector centrality to a file and render it.
# writeandrender(filename,
#     {
#         "measure":eigenvector,
#         "name": "Eigenvector Centrality",
#         "prefix":"eigen",
#         "overwrite": True
#     }
# )

## Betweenness Centrality 

In [None]:
#* Calculate the betweenness centrality of the graph.
# betweenness = nx.betweenness_centrality(G_weighted)

In [None]:
#* Draw the graph with node sizes proportional to their betweenness centrality.
# draw(G_weighted, betweenness, 'Betweenness Centrality')

In [None]:
#* Sort the nodes by their betweenness centrality and print the sorted list.
# x = nx.betweenness_centrality(G_weighted)
# sorted_x = sorted(x.items(), key=lambda kv: kv[1], reverse=True)
# for item in sorted_x:
#     print(item)

In [None]:
#* Write the betweenness centrality to a file and render it.
# writeandrender(
#   filename, {
#       "measure": betweenness,
#       "name": 'Betweenness Centrality',
#       "prefix": "bet",
#       "overwrite": True
#   }
# )