# 0 Importing Packages

In [1]:
# Load the autoreload extension to automatically reload modules before executing code (to avoid restarting the kernel)
%load_ext autoreload 
# NB. uncomment the line above first time you run this cell
%autoreload 2

import pandas as pd

from resources.network_functions import GraphConstructor, calculate_weighted_density
from resources.appendix_table_functions import (summarize_company_interactions, 
                                                filter_data_rows, 
                                                compute_adjacency_matrix_inter, 
                                                compute_adjacency_matrix_intra)


## 0.1 File Paths

In [2]:
# File paths
import resources.filepaths as fp

fp_main = fp.fp_main
fp_main_output = fp.fp_main_output

# To output data that has to go to external s-drive
fp_main_external = fp.fp_main_external
fp_output_external = fp.fp_output_external

# For appendix output files
fp_appendix_output = fp.fp_appendix_output

# 1 Load in edgelist data

In [3]:
# Read in the data
all_edges_user_level = pd.read_parquet(fp_output_external / 'all_edges_user_level.gzip.parquet')
attention_edges_user_level = pd.read_parquet(fp_output_external / 'attention_edges_user_level.gzip.parquet')
collaboration_edges_user_level = pd.read_parquet(fp_output_external / 'collaboration_edges_user_level.gzip.parquet')

In [4]:
## 1.1 Construct the graphs
gc_attention = GraphConstructor(all_edges_user_level, graph_type='attention')
attention_graph = gc_attention.get_graph()

gc_collaboration = GraphConstructor(all_edges_user_level, graph_type='collaboration')
collaboration_graph = gc_collaboration.get_graph()

## 2 Calculate basic descriptive for users and companies

In [5]:
# Get unique companies and their company types
unique_src_companies = all_edges_user_level[['src_company', 'src_company_category']].rename(columns={'src_company': 'company', 'src_company_category': 'category'}).drop_duplicates()
unique_target_companies = all_edges_user_level[['target_company', 'target_company_category']].rename(columns={'target_company': 'company', 'target_company_category': 'category'}).drop_duplicates()

# Get the unique set of companies and their categories
unique_companies = pd.concat([unique_src_companies, unique_target_companies]).drop_duplicates().reset_index(drop=True)

# Companies in total
total_companies = unique_companies['company'].nunique()
print('Total number of unique companies:', total_companies)

# Number of each type of company
category_counts = unique_companies['category'].value_counts()
print('Number of each type of company:')
category_counts

Total number of unique companies: 15
Number of each type of company:


category
1    15
Name: count, dtype: int64

In [6]:
# Get unique users and their types
unique_src = all_edges_user_level[['src', 'src_usertype']].rename(columns={'src_user_type': 'usertype', 'src': 'user'}).drop_duplicates()
unique_target = all_edges_user_level[['target', 'target_usertype']].rename(columns={'target_usertype': 'usertype', 'target': 'user'}).drop_duplicates()

# Get unique set of src and target
unique_set = pd.concat([unique_src, unique_target]).drop_duplicates().reset_index(drop=True)

# Users in total
print(f'Number of unique users in total: {len(unique_set)}')

# Types of users
unique_set.usertype.value_counts().sort_index()

Number of unique users in total: 192


usertype
Organization     9
User            74
Name: count, dtype: int64

# 3 Collaboration Network Stats

## 3.1 General network statistics

In [7]:
# User level
no_users = len(pd.unique(collaboration_edges_user_level[['src', 'target']].values.ravel()))
no_unique_inter_user_to_user = collaboration_edges_user_level[collaboration_edges_user_level['d_inter_level'] == 1][['src', 'target']].drop_duplicates().shape[0]
no_unique_intra_user_to_user = collaboration_edges_user_level[collaboration_edges_user_level['d_intra_level'] == 1][['src', 'target']].drop_duplicates().shape[0]

# Company level
no_companies = len(set(collaboration_edges_user_level["src_company"]).union(collaboration_edges_user_level["target_company"]))
no_inter_company_edges_directed = len([(u,v) for u, v, d in collaboration_graph.edges(data=True) if d.get("d_inter_level") == 1])

# Total weight of inter-company edges (user-level, directed)
no_inter_gh = collaboration_edges_user_level[collaboration_edges_user_level['d_inter_level'] == 1].shape[0]

# Total weight of self-loop edges (src_company == tgt_company)z
no_intra_gh = collaboration_edges_user_level[collaboration_edges_user_level['d_intra_level'] == 1].shape[0]

# Calculate the weighted density of the collaboration graph
weighted_density_collaboration = calculate_weighted_density(collaboration_graph)

print(f"No. of users: {no_users}")
print(f"No. of companies: {no_companies}")
print(f"Inter-company GH actions: {no_inter_gh}")
print(f"Intra-company GH actions: {no_intra_gh}")
print(f"Unique inter-company edges (directed): {no_inter_company_edges_directed}")
print(f"Unique directed user-to-user edges (inter): {no_unique_inter_user_to_user}")
print(f"Unique directed user-to-user edges (intra): {no_unique_intra_user_to_user}")
print(f"Weighted density: {weighted_density_collaboration}")

No. of users: 20
No. of companies: 8
Inter-company GH actions: 0
Intra-company GH actions: 28
Unique inter-company edges (directed): 0
Unique directed user-to-user edges (inter): 0
Unique directed user-to-user edges (intra): 18
Weighted density: 0.0


## 3.2 Generate summary table of edges

In [8]:
# Get the latex table summary for collaboration actions
collaboration_latex_summary, summary = summarize_company_interactions(
    collaboration_edges_user_level, network_type="collaboration"
)

# Save as LaTeX file
with open(fp_appendix_output / 'collaboration_table_4_action_summary_data_rows.tex', 'w') as f:
    f.write(collaboration_latex_summary)

print(summary)

  \textbf{Company}  \textbf{Company Category}  \textbf{Total Inter Actions}  \
0           abtion                          0                             0   
1           eg a s                          0                             0   
2             must                          0                             0   
3       netcompany                          0                             0   
4            shape                          0                             0   
5         signifly                          0                             0   
6          trifork                          0                             0   
7           uptime                          0                             0   

   \textbf{Inter Inbound Forks}  \textbf{Inter Outbound Forks}  \
0                             0                              0   
1                             0                              0   
2                             0                              0   
3                       

## 3.3 Adjadency matrix (only inter-level connections)

In [9]:
# Compute the adjacency matrix for inter-company edges
matrix, latex_matrix = compute_adjacency_matrix_inter(
    collaboration_edges_user_level
)

# Filter data rows only
collaboration_adjacency_inter = filter_data_rows(latex_matrix, adjacency_table=True)
print(collaboration_adjacency_inter)

# Save the latex table to a file
with open(fp_appendix_output / "collaboration_table_1_adjacency_inter_data_rows.tex", "w") as f:
    f.write(collaboration_adjacency_inter)




## 3.4 Adjadency matrix (all connections, including intra)

In [10]:
# Compute the adjacency matrix for intra-company edges
matrix, latex_matrix = compute_adjacency_matrix_intra(collaboration_edges_user_level)

# Filter data rows only
collaboration_adjacency_intra = filter_data_rows(latex_matrix, adjacency_table=True)
print(collaboration_adjacency_intra)

# Save the latex table to a file
with open(fp_appendix_output / "collaboration_table_3_adjacency_intra_data_rows.tex", "w") as f:
    f.write(collaboration_adjacency_intra)

1 Digital and marketing consultancies & 28 \\


# 4 Attention Network Stats

## 4.1 General network statistics

In [11]:
# User level
no_users = len(pd.unique(attention_edges_user_level[['src', 'target']].values.ravel()))
no_unique_inter_user_to_user = attention_edges_user_level[attention_edges_user_level['d_inter_level'] == 1][['src', 'target']].drop_duplicates().shape[0]
no_unique_intra_user_to_user = attention_edges_user_level[attention_edges_user_level['d_intra_level'] == 1][['src', 'target']].drop_duplicates().shape[0]

# Company level
no_companies = len(set(attention_edges_user_level["src_company"]).union(attention_edges_user_level["target_company"]))
no_inter_company_edges_directed = len([(u,v) for u,v, d in attention_graph.edges(data=True) if d.get("d_inter_level") == 1])

# Total weight of inter-company edges (user-level, directed)
no_inter_gh = attention_edges_user_level[attention_edges_user_level['d_inter_level'] == 1].shape[0]

# Total weight of self-loop edges (src_company == tgt_company)z
no_intra_gh = attention_edges_user_level[attention_edges_user_level['d_intra_level'] == 1].shape[0]

# Calculate the weighted density of the attention graph
weighted_density_attention = calculate_weighted_density(attention_graph)

print(f"No. of users: {no_users}")
print(f"No. of companies: {no_companies}")
print(f"Inter-company GH actions: {no_inter_gh}")
print(f"Intra-company GH actions: {no_intra_gh}")
print(f"Unique inter-company edges (directed): {no_inter_company_edges_directed}")
print(f"Unique directed user-to-user edges (inter): {no_unique_inter_user_to_user}")
print(f"Unique directed user-to-user edges (intra): {no_unique_intra_user_to_user}")
print(f"Weighted density: {weighted_density_attention}")

No. of users: 114
No. of companies: 15
Inter-company GH actions: 3
Intra-company GH actions: 1197
Unique inter-company edges (directed): 3
Unique directed user-to-user edges (inter): 3
Unique directed user-to-user edges (intra): 135
Weighted density: 0.014285714285714285


## 4.2 Generate summary table of edges

In [12]:
# Get the latex table summary for attention actions
attention_latex_summary, summary = summarize_company_interactions(
    attention_edges_user_level, network_type="attention"
)

# save as LaTeX file
latex_file_path = fp_appendix_output / 'attention_table_6_action_summary_data_rows.tex'
with open(latex_file_path, 'w') as f:
    f.write(attention_latex_summary)

print(summary)

   \textbf{Company}  \textbf{Company Category}  \textbf{Total Inter Actions}  \
0            abtion                          0                             2   
4         capgemini                          0                             0   
3     charlie tango                          0                             1   
5            eg a s                          0                             0   
6               ffw                          0                             0   
7            holion                          0                             0   
8             kruso                          0                             0   
9              must                          0                             0   
10       netcompany                          0                             0   
1            oxygen                          0                             2   
11            shape                          0                             0   
2          signifly                     

## 4.3 Adjadency matrix (only inter-level conenctions)

In [13]:
# Compute the adjacency matrix for inter-company edges
matrix, latex_matrix = compute_adjacency_matrix_inter(
    attention_edges_user_level
)

# Filter data rows only
attention_adjacency_inter = filter_data_rows(latex_matrix, adjacency_table=True)
print(attention_adjacency_inter)

# Save the latex table to a file
with open(fp_appendix_output / "attention_table_5_adjacency_inter_data_rows.tex", "w") as f:
    f.write(attention_adjacency_inter)

1 Digital and marketing consultancies & 3 \\


## 4.4 Adjadency matrix (all connections, including intra-level)

In [14]:
# Compute the adjacency matrix for intra-company edges
matrix, latex_matrix = compute_adjacency_matrix_intra(attention_edges_user_level)

# Filter data rows only
attention_adjacency_intra = filter_data_rows(latex_matrix, adjacency_table=True)
print(attention_adjacency_intra)

# Save the latex table to a file
with open(fp_appendix_output / "attention_2_adjacency_intra_data_rows.tex", "w") as f:
    f.write(attention_adjacency_intra)

1 Digital and marketing consultancies & 1200 \\
