# Importing Packages

In [431]:
import matplotlib.pyplot as plt
from networkx_viewer import Viewer
import matplotlib.colors as mcolors
from collections import defaultdict

#from model import spcall

%matplotlib inline

In [432]:
def draw(G, measures, measure_name):
  #* https://stackoverflow.com/a/52013202
  #* https://aksakalli.github.io/2017/07/17/network-centrality-measures-and-their-visualization.html
  #* https://www.datacamp.com/community/tutorials/social-network-analysis-python

  #* Create two lists of edges based on their weight.
  #* 'elarge' contains edges with weight greater than 5.
  #* 'esmall' contains edges with weight less than or equal to 5.
  elarge = [(u, v) for (u, v, d) in G.edges(data=True) if d['weight'] > 5]
  esmall = [(u, v) for (u, v, d) in G.edges(data=True) if d['weight'] <= 5]

  #* Generate a spring layout for the graph.
  pos = nx.spring_layout(G)

  #* Set the size of each node based on its corresponding measure value.
  node_size = [v * 1000 for v in measures.values()]

  #* Draw the nodes of the graph with their size and color determined by the measure values.
  #* The color map 'plt.cm.plasma' is used for coloring the nodes.
  nodes = nx.draw_networkx_nodes(G, pos, node_size=node_size, 
                                  cmap=plt.cm.plasma,
                                  node_color=list(measures.values()),
                                  nodelist=measures.keys())

  #* Set the color normalization of the nodes to be logarithmic.
  nodes.set_norm(mcolors.SymLogNorm(linthresh=0.01, linscale=1))
  
  #* Draw the edges of the graph.
  edges = nx.draw_networkx_edges(G, pos)
  
  #* Draw the 'elarge' and 'esmall' edges with different styles.
  #* The 'elarge' edges are drawn with a width of 2.
  #* The 'esmall' edges are drawn with a width of 2, transparency of 0.5, blue color, and dashed style.
  nx.draw_networkx_edges(G, pos, edgelist=elarge, width=2)
  nx.draw_networkx_edges(G, pos, edgelist=esmall, width=2, alpha=0.5, edge_color='blue', style='dashed')

  #* Add labels to the nodes with a font size of 10, blue color, and sans-serif font family.
  nx.draw_networkx_labels(G, pos, font_size=10, font_color='blue', font_family='sans-serif')
  
  #* Set the title of the plot, add a color bar, turn off the axis, and display the plot.
  plt.title(measure_name)
  plt.colorbar(nodes)
  plt.axis('off')
  plt.show()


In [433]:
def plot_G(G, measures):
  #* Define two lists of edges based on their weight.
  #* 'elarge' contains edges with weight greater than 5.
  #* 'esmall' contains edges with weight less than or equal to 5.
  elarge = [(u, v) for (u, v, d) in G.edges(data=True) if d['weight'] > 5]
  esmall = [(u, v) for (u, v, d) in G.edges(data=True) if d['weight'] <= 5]

  #* Generate a spring layout for the graph.
  #* This layout treats edges as springs holding nodes close, while treating nodes as repelling objects.
  pos = nx.spring_layout(G)
  
  #* Set the size of each node based on its corresponding measure value.
  #* The size is multiplied by 1000 for better visibility.
  node_size = [v * 1000 for v in measures.values()]

  #* Draw the nodes of the graph with their size and color determined by the measure values.
  #* The color map 'plt.cm.plasma' is used for coloring the nodes.
  nodes = nx.draw_networkx_nodes(G, pos, node_size=node_size,
                                cmap=plt.cm.plasma,
                                node_color=list(measures.values()),
                                nodelist=measures.keys())

  #* Set the color normalization of the nodes to be logarithmic.
  #* This can be useful if the measure values vary widely.
  nodes.set_norm(mcolors.SymLogNorm(linthresh=0.01, linscale=1))

  #* Draw the nodes of the graph again with a fixed size of 50 and a color map of 'plt.cm.plasma'.
  nx.draw_networkx_nodes(G, pos, node_size=50, cmap=plt.cm.plasma)

  #* Draw the 'elarge' and 'esmall' edges with different styles.
  #* The 'elarge' edges are drawn with a width of 2.
  #* The 'esmall' edges are drawn with a width of 2, transparency of 0.5, blue color, and dashed style.
  nx.draw_networkx_edges(G, pos, edgelist=elarge, width=2)
  nx.draw_networkx_edges(G, pos, edgelist=esmall, width=2, alpha=0.5, edge_color='blue', style='dashed')

  #* Add labels to the nodes with a font size of 10, black color, and sans-serif font family.
  nx.draw_networkx_labels(G ,pos, font_size=10, font_color='black', font_family='sans-serif')

  #* Turn off the axis and display the plot.
  plt.axis('off')
  plt.show()

In [434]:
def plot_Gp(G, measures):
  # Set the figure size to make the plot high-definition.
  plt.figure(figsize=(50, 50), dpi=300)

  #* Define two lists of edges based on their weight.
  #* 'elarge' contains edges with weight greater than 5.
  #* 'esmall' contains edges with weight less than or equal to 5.
  elarge = [(u, v) for (u, v, d) in G.edges(data=True) if d['weight'] > 5]
  esmall = [(u, v) for (u, v, d) in G.edges(data=True) if d['weight'] <= 5]

  #* Generate a spring layout for the graph.
  #* This layout treats edges as springs holding nodes close, while treating nodes as repelling objects.
  pos = nx.spring_layout(G, iterations=13, scale=300, seed=1234)

  #* Set the size of each node based on its corresponding measure value.
  #* The size is multiplied by 1000 for better visibility.
  node_size = [v * 1000 for v in measures.values()]

  #* Draw the nodes of the graph with their size and color determined by the measure values.
  #* The color map 'plt.cm.plasma' is used for coloring the nodes.
  nodes = nx.draw_networkx_nodes(G, pos, node_size=node_size, cmap=plt.cm.plasma, 
                                  node_color=list(measures.values()),
                                  nodelist=measures.keys())

  #* Set the color normalization of the nodes to be logarithmic.
  #* This can be useful if the measure values vary widely.
  nodes.set_norm(mcolors.SymLogNorm(linthresh=0.01, linscale=1))

  #* Create a color map based on the node labels.
  #* Different labels are mapped to different colors.
  color_map = []
  for node in G:
    if 'fac' in node:
        color_map.append('blue')
    elif 'stu' in node:
        color_map.append('green')
    elif 'adm' in node:
        color_map.append('yellow')
    elif 'sub' in node:
        color_map.append('orange')
    elif 'isp' in node:
        color_map.append('red')
    elif 'bis' in node:
        color_map.append('purple')
    elif 'par' in node:
        color_map.append('black')
    elif 'sup' in node:
        color_map.append('aqua')
    elif node in ['grade', 'assignment', 'Bulletin Board', 're-assign', 'enroll', 'register', 'transfer', 'drop']:
        color_map.append('gray')  # white for the specific nodes
    else:
        print (node)

  #* Draw the nodes of the graph again with a fixed size of 10 and a color map based on the node labels.
  nx.draw_networkx_nodes(G, pos, node_size=10, node_color=color_map, cmap=plt.cm.plasma)

  #* Draw the 'elarge' and 'esmall' edges with different styles.
  #* The 'elarge' edges are drawn with a width of 2 and gray color.
  #* The 'esmall' edges are drawn with a width of 2, transparency of 0.5, gray color, and dashed style.
  nx.draw_networkx_edges(G, pos, edgelist=elarge, edge_color='gray', width=1)
  nx.draw_networkx_edges(G, pos, edgelist=esmall, width=1, alpha=0.5, edge_color='gray', style='dashed')

  #* Add labels to the nodes with a font size of 10, gray color, and sans-serif font family.
  nx.draw_networkx_labels(G, pos, font_size=3, font_color='black', font_family='sans-serif')

  #* Turn off the axis and display the plot.
  plt.axis('off')

  #* Increase the DPI to 300 for a high-quality plot.
  plt.savefig("network.png", dpi=300)
  
  #* Display the plot 
  plt.show()


In [435]:
def plot_G2(G):
  #* Generate a spring layout for the graph.
  pos = nx.spring_layout(G)

  #* Draw the graph using NetworkX's built-in draw function.
  nx.draw_networkx(G)

  #* Add labels to the nodes with a font size of 10, gray color, and sans-serif font family.
  nx.draw_networkx_labels(G, pos, font_size=10, font_color='gray', font_family='sans-serif')

  #* Turn off the axis and display the plot.
  plt.axis('off')
  plt.show()

In [436]:
def writeandrender(filename, centrality):
  #* Extract the measure from the centrality dictionary.
  cmeasure = centrality["measure"]
  
  #* Sort the items in the measure dictionary in descending order based on their values.
  sorted_x = sorted(cmeasure.items(), key=lambda kv: kv[1], reverse=True)

  #* Check the 'overwrite' flag in the centrality dictionary.
  #* If it's True, open the file in write mode, which overwrites the existing content.
  #* If it's False, open the file in append mode, which adds to the existing content.
  if centrality["overwrite"]:
      f = open(filename + centrality["prefix"] + ".txt", 'w')
  else:
      f = open(filename + centrality["prefix"] + ".txt", 'a')
  
  #* Write the sorted items to the file.
  f.write(str(sorted_x))
  
  #* Close the file.
  f.close()

  #* Draw the weighted graph with the measure and the name from the centrality dictionary.
  draw(G_weighted, centrality["measure"], centrality["name"])

# Data Cleaning

##### Removing the generic users from the dataset: "7505d64a54e061b7acd54ccd58b49dc43500b635"

In [437]:
import pandas as pd

# Load the data into a pandas DataFrame
df = pd.read_csv('C:/Users/boyma/OneDrive/Desktop/SNA_code/agent_to_tasks/raw_data.csv')

# Print the number of rows before removing
print(f"Number of rows before removing: {len(df)}")

# Remove rows where 'receiverid' is "7505d64a54e061b7acd54ccd58b49dc43500b635"
df = df[df['receiverid'] != "7505d64a54e061b7acd54ccd58b49dc43500b635"]

# Print the number of rows after removing
print(f"Number of rows after removing: {len(df)}")


Number of rows before removing: 49493
Number of rows after removing: 46920


# Data Transformation 
- Transform the data from "reactions" column into their string equivalents in a new column called "emoji"
- Transform the data from "roomid" column into a new column called "room_name". For every new data, append the string "room_" to the beginning of the "roomid". For example, if the "roomid" is "a0dc6db1830d89519e8f", then the new column will be "room_a0dc6db". 
- Add a new column called "commented" where a string "commented" is added when the "commenter" column is not empty, otherwise, leave it as is. 



In [438]:
# Define a mapping from integers to emoji names
reaction_mapping = {
    1: 'like',
    2: 'happy',
    3: 'surprise',
    4: 'sad',
    5: 'angry'
}

# Create a new column 'emoji' by mapping the 'reaction' column to the corresponding emoji names
df['emoji'] = df['reaction'].map(reaction_mapping)

# Create a new column 'room_name' by appending 'room_' to the first 7 characters of the 'roomid' column
df['room_name'] = 'room_' + df['roomid'].str.slice(0, 7)

# Create a new column 'commented' where its data is derived from the 'commenter' column
df['commented'] = df['commenter'].apply(lambda x: 'commented' if pd.notnull(x) else '')


### Export to a new csv file to check whether the transformation happens properly

In [439]:
# Write the DataFrame to a CSV file
df.to_csv('new_data.csv', index=False)


# Anonymize the users

In [440]:
import hashlib
import pandas as pd

def anonymize_id(id):
    if pd.isnull(id):
        return ''
    prefix = id[0]
    hash_object = hashlib.sha1(id.encode())
    hex_dig = hash_object.hexdigest()
    if prefix == 'A':
        return 'adm' + hex_dig[:5]
    elif prefix == 'F':
        return 'fac' + hex_dig[:5]
    elif prefix == 'S':
        return 'stu' + hex_dig[:5]
    elif prefix == 'P':
        return 'par' + hex_dig[:5]
    else:
        return 'unk' + hex_dig[:5]

# Create a dictionary to store the original IDs and their corresponding anonymized IDs
anonymized_ids = {}

# Get all unique IDs in the 'initiatorid', 'receiverid', 'reactor', and 'commenter' columns
unique_ids = pd.concat([df['initiatorid'], df['receiverid'], df['reactor'], df['commenter']]).dropna().unique()

# Create a mapping from the original IDs to the hashed IDs
for id in unique_ids:
    anonymized_ids[id] = anonymize_id(id)

# Replace the original IDs with the hashed IDs in the 'initiatorid', 'receiverid', 'reactor', and 'commenter' columns
for column in ['initiatorid', 'receiverid', 'reactor', 'commenter']:
    df[column] = df[column].map(anonymized_ids)


### Export to a new csv to check whether the anonymization is successful

In [441]:
# Write the DataFrame to a CSV file
df.to_csv('anon_data.csv', index=False)


# Start Mapping The Interactions

In [442]:
import networkx as nx

# Create a directed graph
G = nx.DiGraph()

# Iterate over the rows of the dataframe
for index, row in df.iterrows():
    # Check if the IDs are not empty or null
    if pd.notnull(row['initiatorid']) and pd.notnull(row['receiverid']):
        # For Timeline Interactions
        G.add_edge(row['initiatorid'], row['tltype'])
        G.add_edge(row['tltype'], row['receiverid'])

        # For Reactions
        if pd.notnull(row['reactor']):
            if row['reactor'] == row['initiatorid']:
                # Add edges from reactor to reaction and from reaction to receiver
                G.add_edge(row['reactor'], row['emoji'])
                G.add_edge(row['emoji'], row['receiverid'])
            elif row['reactor'] == row['receiverid']:
                # Add edges from reactor to reaction and from reaction to initiator
                G.add_edge(row['reactor'], row['emoji'])
                G.add_edge(row['emoji'], row['initiatorid'])

        # For Comments
        if pd.notnull(row['commenter']):
            G.add_edge(row['commenter'], row['commented'])
            G.add_edge(row['commented'], row['initiatorid'])

        # For OnlineClass
        G.add_edge(row['initiatorid'], row['room_name'])


# Add the 'color' attribute to the nodes based on the node labels
color_map = {}
for node in G.nodes:
    node_str = str(node)
    if 'fac' in node_str:
        color_map[node] = 'blue'
    elif 'stu' in node_str:
        color_map[node] = 'green'
    elif 'adm' in node_str:
        color_map[node] = 'yellow'
    elif 'sub' in node_str:
        color_map[node] = 'orange'
    elif 'isp' in node_str:
        color_map[node] = 'red'
    elif 'bis' in node_str:
        color_map[node] = 'purple'
    elif 'par' in node_str:
        color_map[node] = 'black'
    elif 'sup' in node_str:
        color_map[node] = 'aqua'
    elif node_str in ['assignment', 'attendance', 'Bulletin Board', 'collaborate', 'enroll', 'event', 'grade', 're-assign', 'register', 'like', 'happy', 'surprise', 'sad','angry', 'room_', 'commented']:
        color_map[node] = 'gray'
    else:
        print(node_str)

nx.set_node_attributes(G, color_map, 'color')

# Add the 'weight' attribute to the edges
for u, v, data in G.edges(data=True):
    data['weight'] = data.get('weight', 1) + 1

nan
room_5896e22
room_07a0beb
room_45d026b
room_7eb680a
room_a0dc6db
room_73fc6b9
room_21ad9f3
room_3814351
room_9cebe5e
room_09e404c
room_bbc5def
room_6705f3c
room_fa4aa54
room_43a3584
room_5912cff
room_9c852f0
room_7711bfe
room_2d57440
room_51aac1d
room_ee69103
room_9528233
room_7ba0b2d
room_d7a297d
room_b68c148
room_c9540e9
room_c97d3de
room_5384ea4
room_a4ae8a2
room_c7b41e1
room_1d36d69
room_8496ef0
room_21357a9
room_01a8a99
room_adbc112
room_7f4db2e
room_c8bbfe9
room_39aee10
room_675a7e3
room_8d6af15


In [443]:
# Export the graph to a GraphML file
nx.write_graphml_lxml(G, 'learning_interactions.graphml')

# Timeline Table

In [444]:
# import pandas as pd
# import networkx as nx
# import hashlib

# # Read the CSV file into a DataFrame
# df = pd.read_csv('C:/Users/boyma/OneDrive/Desktop/SNA_code/agent_to_tasks/timeline_dataset.csv')

# def anonymize_id(id):
#     if pd.isnull(id):
#         return 'unknown'
#     prefix = id[0]
#     hash_object = hashlib.sha1(id.encode())
#     hex_dig = hash_object.hexdigest()
#     if prefix == 'A':
#         return 'adm' + hex_dig[:5]
#     elif prefix == 'F':
#         return 'fac' + hex_dig[:5]
#     elif prefix == 'S':
#         return 'stu' + hex_dig[:5]
#     elif prefix == 'P':
#         return 'par' + hex_dig[:5]
#     else:
#         return 'unk' + hex_dig[:5]

# # Anonymize all the rows or records from the columns: initiatorid, receiverid
# df['initiatorid'] = df['initiatorid'].apply(anonymize_id)
# df['receiverid'] = df['receiverid'].apply(anonymize_id)


# # Create a new directed graph
# G = nx.DiGraph()

# # Define a color map based on the node labels
# color_map = {
#     'fac': 'blue',
#     'stu': 'green',
#     'adm': 'yellow',
#     'sub': 'orange',
#     'isp': 'red',
#     'bis': 'purple',
#     'par': 'black',
#     'sup': 'aqua',
#     'assignment': 'white',
#     'attendance': 'white',
#     'Bulletin Board': 'white',
#     'collaborate': 'white',
#     'enroll': 'white',
#     'event': 'white',
#     'grade': 'white',
#     're-assign': 'white',
#     'register': 'white'
# }

# # Iterate over the DataFrame
# for index, row in df.iterrows():
#     G.add_edge(row['initiatorid'], row['tltype'])
#     G.add_edge(row['tltype'], row['receiverid'])

#     # Add the color attribute to the nodes
#     for node in [row['initiatorid'], row['tltype'], row['receiverid']]:
#         for key in color_map:
#             if key in node:
#                 nx.set_node_attributes(G, {node: color_map[key]}, 'colour')

In [445]:
# nx.write_graphml_lxml(G, 'timeline_interactions.graphml')

# Reactions Table


In [446]:
# import pandas as pd
# import networkx as nx
# import hashlib

# # Step 1: Import the dataset
# df = pd.read_csv('C:/Users/boyma/OneDrive/Desktop/SNA_code/agent_to_tasks/reactions_dataset.csv')

# # Step 2: Anonymize the initiatorid, receiverid, reactor
# def anonymize_id(id):
#     if pd.isnull(id):
#         return 'unknown'
#     prefix = id[0]
#     hash_object = hashlib.sha1(id.encode())
#     hex_dig = hash_object.hexdigest()
#     if prefix == 'A':
#         return 'adm' + hex_dig[:5]
#     elif prefix == 'F':
#         return 'fac' + hex_dig[:5]
#     elif prefix == 'S':
#         return 'stu' + hex_dig[:5]
#     elif prefix == 'P':
#         return 'par' + hex_dig[:5]
#     else:
#         return 'unk' + hex_dig[:5]

# df['initiatorid'] = df['initiatorid'].apply(anonymize_id)
# df['receiverid'] = df['receiverid'].apply(anonymize_id)

# # Anonymize reactor based on the presence in initiatorid or receiverid
# df['reactor'] = df.apply(lambda row: row['initiatorid'] if row['reactor'] == row['initiatorid'] else (row['receiverid'] if row['reactor'] == row['receiverid'] else anonymize_id(row['reactor'])), axis=1)

# # Step 3: Generate a Digraph
# G = nx.DiGraph()

# # Define a color map based on the node labels
# color_map = {
#     'fac': 'blue',
#     'stu': 'green',
#     'adm': 'yellow',
#     'sub': 'orange',
#     'isp': 'red',
#     'bis': 'purple',
#     'par': 'black',
#     'sup': 'aqua',
#     'assignment': 'white',
#     'attendance': 'white',
#     'Bulletin Board': 'white',
#     'collaborate': 'white',
#     'enroll': 'white',
#     'event': 'white',
#     'grade': 'white',
#     're-assign': 'white',
#     'register': 'white'
# }

# # Step 4: Map their interaction
# for index, row in df.iterrows():
#     # For Reactions
#     if row['reactor'] == row['initiatorid']:
#         # Add edges from reactor to reaction and from reaction to receiver
#         G.add_edge(row['reactor'], row['reaction'])
#         G.add_edge(row['reaction'], row['receiverid'])
#     elif row['reactor'] == row['receiverid']:
#         # Add edges from reactor to reaction and from reaction to initiator
#         G.add_edge(row['reactor'], row['reaction'])
#         G.add_edge(row['reaction'], row['initiatorid'])

#     # Add the color attribute to the nodes
#     for node in [row['initiatorid'], row['reaction'], row['receiverid']]:
#         for key in color_map:
#             if key in node:
#                 nx.set_node_attributes(G, {node: color_map[key]}, 'colour')

In [447]:
# nx.write_graphml_lxml(G, 'reactions_interactions.graphml')

# Learning Interactions

In [448]:
# import pandas as pd
# import networkx as nx
# import matplotlib.pyplot as plt
# import hashlib

# def anonymize_id(id):
#     if pd.isnull(id):
#         return 'unknown'
#     prefix = id[0]
#     hash_object = hashlib.sha1(id.encode())
#     hex_dig = hash_object.hexdigest()
#     if prefix == 'A':
#         return 'adm' + hex_dig[:5]
#     elif prefix == 'F':
#         return 'fac' + hex_dig[:5]
#     elif prefix == 'S':
#         return 'stu' + hex_dig[:5]
#     elif prefix == 'P':
#         return 'par' + hex_dig[:5]
#     else:
#         return 'unk' + hex_dig[:5]

# # Import the dataset into a DataFrame
# df = pd.read_csv('C:/Users/boyma/OneDrive/Desktop/SNA_code/agent_to_tasks/learning_interaction_dataset.csv')

# # Drop all records that has this value from the receiverid column
# df = df[df['receiverid'] != "7505d64a54e061b7acd54ccd58b49dc43500b635"]

# # Anonymize all the rows or records from the columns: initiatorid, receiverid
# df['initiatorid'] = df['initiatorid'].apply(anonymize_id)
# df['receiverid'] = df['receiverid'].apply(anonymize_id)
# df['reactor'] = df['reactor'].apply(anonymize_id)

# # After the anonymization, transform all records from the reaction column into "reaction"
# df['reaction'] = 'reaction'

# # Also transform all data from the commenter column into "comment"
# df['commenter'] = 'comment'

# # Also, transform all the data from the tltype into "onlineclass" except for specific records
# df.loc[~df['tltype'].isin(['assignment', 'attendance', 'Bulletin Board', 'collaborate', 'enroll', 'event', 'grade', 're-assign', 'register']), 'tltype'] = 'onlineclass'

# # Create a Directed Graph out of the new dataframe
# G = nx.DiGraph()

# # Define a color map based on the node labels
# color_map = {
#     'fac': 'blue',
#     'stu': 'green',
#     'adm': 'yellow',
#     'sub': 'orange',
#     'isp': 'red',
#     'bis': 'purple',
#     'par': 'black',
#     'sup': 'aqua',
#     'assignment': 'white',
#     'attendance': 'white',
#     'Bulletin Board': 'white',
#     'collaborate': 'white',
#     'enroll': 'white',
#     'event': 'white',
#     'grade': 'white',
#     're-assign': 'white',
#     'register': 'white',
#     'reaction': 'white',
#     'comment': 'white',
#     'onlineclass': 'white'
# }

# # Add edges to the graph based on the interactions and apply the color mapping
# for index, row in df.iterrows():
#     # For Timeline Interactions
#     G.add_edge(row['initiatorid'], row['tltype'])
#     G.add_edge(row['tltype'], row['receiverid'])

#     # For Reactions
#     if row['reactor'] == row['initiatorid']:
#         # Add edges from reactor to reaction and from reaction to receiver
#         G.add_edge(row['reactor'], row['reaction'])
#         G.add_edge(row['reaction'], row['receiverid'])
#     elif row['reactor'] == row['receiverid']:
#         # Add edges from reactor to reaction and from reaction to initiator
#         G.add_edge(row['reactor'], row['reaction'])
#         G.add_edge(row['reaction'], row['initiatorid'])

#     # For Comments
#     G.add_edge(row['receiverid'], row['commenter'])
#     G.add_edge(row['commenter'], row['initiatorid'])

#     # For OnlineClass
#     G.add_edge(row['initiatorid'], row['tltype'])

#     # Add the color attribute to the nodes
#     for node in [row['initiatorid'], row['tltype'], row['receiverid']]:
#         for key in color_map:
#             if key in node:
#                 nx.set_node_attributes(G, {node: color_map[key]}, 'colour')

# # Draw the graph
# color_values = [data['colour'] for node, data in G.nodes(data=True)]
# nx.draw(G, node_color=color_values, with_labels=True)

In [449]:
# Write DataFrame to CSV
# df.to_csv('C:/Users/boyma/OneDrive/Desktop/SNA_code/agent_to_tasks/new_dataframe.csv', index=False)

In [450]:
# nx.write_graphml_lxml(G, 'learning_interactions.graphml')

# Process the raw CSV file 

In [451]:
# import csv
# import hashlib
# from collections import defaultdict
# import pandas as pd
# import networkx as nx

# def anonymize_id(id):
#     prefix = id[0]
#     year = id[1:5]
#     hash_object = hashlib.sha1(id.encode())
#     hex_dig = hash_object.hexdigest()
#     if prefix == 'A':
#         return 'adm' + hex_dig[:5]
#     elif prefix == 'F':
#         return 'fac' + hex_dig[:5]
#     elif prefix == 'S':
#         return 'stu' + hex_dig[:5]
#     elif prefix == 'P':
#         return 'par' + hex_dig[:5]
#     else:
#         return 'unk' + hex_dig[:5]

# filename = input("Filename:")
# with open(filename + ".csv", 'r') as f:
#     reader = csv.reader(f)
#     next(reader)

#     interactions = defaultdict(int)
#     rows = []
#     for row in reader:
#         initiatorid = anonymize_id(row[0])
#         receiverid = anonymize_id(row[1])
#         tltype = row[2]
#         interactions[(initiatorid, tltype)] += 1
#         interactions[(tltype, receiverid)] += 1
#         rows.append([initiatorid, tltype, receiverid])

# # Create a DataFrame from the rows
# df = pd.DataFrame(rows, columns=['initiatorid', 'tltype', 'receiverid'])

# # Add a 'weight' column to the DataFrame
# df['weight'] = df.apply(lambda row: interactions[(row['initiatorid'], row['tltype'])] + interactions[(row['tltype'], row['receiverid'])], axis=1)

# # Now 'df' is a DataFrame that contains your preprocessed data.
# initiatorid = df['initiatorid']
# tltype = df['tltype']
# receiverid = df['receiverid']
# weight = df['weight']

# # Create a new weighted graph.
# G_weighted = nx.Graph()

# # For each row in the DataFrame, add edges to the graph.
# # The nodes are the 'initiatorid', 'tltype', and 'receiverid' columns of the row.
# # The weight of the edge is the 'weight' column of the row.
# for index, row in df.iterrows():
#     G_weighted.add_edge(row['initiatorid'], row['tltype'], weight=row['weight'])
#     G_weighted.add_edge(row['tltype'], row['receiverid'], weight=row['weight'])

# Visualize the Network

In [452]:
#* Plot the graph using the plot_Gp function.
#plot_Gp(G_weighted, measures=nx.degree_centrality(G_weighted))

In [453]:
#* Plot the graph using the plot_Gp function.
#plot_Gp(G_weighted, measures=nx.betweenness_centrality(G_weighted))

In [454]:
#* Plot the graph using the plot_Gp function.
#plot_Gp(G_weighted, measures=nx.eigenvector_centrality(G_weighted))

In [455]:
#* Generate a graphml version of this graph: 


# Basic Topological Attributes

In [456]:
#* Print the graph.
# print(G)

# Centrality Measures 

## Degree Centrality 

In [457]:
#* Calculate the degree centrality of the graph.
# degree = nx.degree_centrality(G_weighted)

In [458]:
#* Draw the graph with node sizes proportional to their degree centrality.
# draw(G_weighted, nx.degree_centrality(G_weighted), 'Degree Centrality')

In [459]:
#* Sort the nodes by their degree centrality and print the sorted list.
# x = degree
# sorted_x = sorted(x.items(), key=lambda kv: kv[1], reverse=True)
# for item in sorted_x:
#     print(item)

In [460]:
#* Write the eigenvector centrality to a file and render it.
# writeandrender(
#     filename, {
#         "measure": degree,
#         "name": "Degree Centrality",
#         "prefix": "degree",
#         "overwrite": True
# })

## Eigenvector Centrality 

In [461]:
#* Calculate the eigenvector centrality of the graph.
# eigenvector = nx.eigenvector_centrality(G_weighted, max_iter=1000)

In [462]:
#* Draw the graph with node sizes proportional to their eigenvector centrality.
# draw(G_weighted, eigenvector, 'Eigenvector Centrality')

In [463]:
#* Sort the nodes by their betweenness centrality and print the sorted list.
# x = eigenvector
# sorted_x = sorted(x.items(), key=lambda kv: kv[1], reverse=True)
# for item in sorted_x:
#     print(item)

In [464]:
#* Write the eigenvector centrality to a file.
# with open(filename + "eigen.txt", 'a') as f:
#     f.write(str(eigenvector))

In [465]:
#* Write the eigenvector centrality to a file and render it.
# writeandrender(filename,
#     {
#         "measure":eigenvector,
#         "name": "Eigenvector Centrality",
#         "prefix":"eigen",
#         "overwrite": True
#     }
# )

## Betweenness Centrality 

In [466]:
#* Calculate the betweenness centrality of the graph.
# betweenness = nx.betweenness_centrality(G_weighted)

In [467]:
#* Draw the graph with node sizes proportional to their betweenness centrality.
# draw(G_weighted, betweenness, 'Betweenness Centrality')

In [468]:
#* Sort the nodes by their betweenness centrality and print the sorted list.
# x = nx.betweenness_centrality(G_weighted)
# sorted_x = sorted(x.items(), key=lambda kv: kv[1], reverse=True)
# for item in sorted_x:
#     print(item)

In [469]:
#* Write the betweenness centrality to a file and render it.
# writeandrender(
#   filename, {
#       "measure": betweenness,
#       "name": 'Betweenness Centrality',
#       "prefix": "bet",
#       "overwrite": True
#   }
# )