# 0 Importing Packages

In [1]:
# Load the autoreload extension to automatically reload modules before executing code (to avoid restarting the kernel)
%load_ext autoreload 
# NB. uncomment the line above first time you run this cell
%autoreload 2

from pathlib import Path
import pandas as pd

from resources.network_functions import GraphConstructor, calculate_weighted_density
from resources.appendix_table_functions import (summarize_company_interactions, 
                                                filter_data_rows, 
                                                compute_adjacency_matrix_inter, 
                                                compute_adjacency_matrix_intra)


## 0.1 File Paths

In [None]:
# File paths
import resources.filepaths as fp

fp_main = fp.fp_main
fp_main_output = fp.fp_main_output

# To output data that has to go to external s-drive
fp_main_external = fp.fp_main_external
fp_output_external = fp.fp_output_external

# For appendix output files
fp_appendix_output = fp.fp_appendix_output

# 1 Load in edgelist data

In [22]:
# Read in the data
all_edges_user_level = pd.read_parquet(fp_main_output / 'all_edges_user_level.gzip.parquet')
attention_edges_user_level = pd.read_parquet(fp_main_output / 'attention_edges_user_level.gzip.parquet')
collaboration_edges_user_level = pd.read_parquet(fp_main_output / 'collaboration_edges_user_level.gzip.parquet')

In [23]:
## 1.1 Construct the graphs
gc_attention = GraphConstructor(all_edges_user_level, graph_type='attention')
attention_graph = gc_attention.get_graph()

gc_collaboration = GraphConstructor(all_edges_user_level, graph_type='collaboration')
collaboration_graph = gc_collaboration.get_graph()

## 2 Calculate basic descriptive for users and companies

In [None]:
# Get unique companies and their company types
unique_src_companies = all_edges_user_level[['src_company', 'src_company_category']].rename(columns={'src_company': 'company', 'src_company_category': 'category'}).drop_duplicates()
unique_target_companies = all_edges_user_level[['target_company', 'target_company_category']].rename(columns={'target_company': 'company', 'target_company_category': 'category'}).drop_duplicates()

# Get the unique set of companies and their categories
unique_companies = pd.concat([unique_src_companies, unique_target_companies]).drop_duplicates().reset_index(drop=True)

# Companies in total
total_companies = unique_companies['company'].nunique()
print('Total number of unique companies:', total_companies)

# Number of each type of company
category_counts = unique_companies['category'].value_counts()
print('Number of each type of company:')
print(category_counts)

Total number of unique companies: 46
Number of each type of company:
category
1.0    24
4.0    12
2.0     6
3.0     4
Name: count, dtype: int64


In [None]:
# Get unique users and their types
unique_src = all_edges_user_level[['src', 'usertype_src']].rename(columns={'usertype_src': 'usertype', 'src': 'user'}).drop_duplicates()
unique_target = all_edges_user_level[['target', 'usertype_target']].rename(columns={'usertype_target': 'usertype', 'target': 'user'}).drop_duplicates()

# Get unique set of src and target
unique_set = pd.concat([unique_src, unique_target]).drop_duplicates().reset_index(drop=True)

# Users in total
print(f'Number of unique users in total: {len(unique_set)}')

# Types of users
unique_set.usertype.value_counts().sort_index()

Number of unique users in total: 372


usertype
Organization     51
User            321
Name: count, dtype: int64

# 3 Collaboration Network Stats

## 3.1 General network statistics

In [24]:
# User level
no_users = len(pd.unique(collaboration_edges_user_level[['src', 'target']].values.ravel()))
no_unique_inter_user_to_user = collaboration_edges_user_level[collaboration_edges_user_level['d_inter_level'] == 1][['src', 'target']].drop_duplicates().shape[0]
no_unique_intra_user_to_user = collaboration_edges_user_level[collaboration_edges_user_level['d_intra_level'] == 1][['src', 'target']].drop_duplicates().shape[0]

# Company level
no_companies = len(set(collaboration_edges_user_level["src_company"]).union(collaboration_edges_user_level["target_company"]))
no_inter_company_edges_directed = len([(u,v) for u, v, d in collaboration_graph.edges(data=True) if d.get("d_inter_level") == 1])

# Total weight of inter-company edges (user-level, directed)
no_inter_gh = collaboration_edges_user_level[collaboration_edges_user_level['d_inter_level'] == 1].shape[0]

# Total weight of self-loop edges (src_company == tgt_company)z
no_intra_gh = collaboration_edges_user_level[collaboration_edges_user_level['d_intra_level'] == 1].shape[0]

# Calculate the weighted density of the collaboration graph
weighted_density_collaboration = calculate_weighted_density(collaboration_graph)

print(f"No. of users: {no_users}")
print(f"No. of companies: {no_companies}")
print(f"Inter-company GH actions: {no_inter_gh}")
print(f"Intra-company GH actions: {no_intra_gh}")
print(f"Unique inter-company edges (directed): {no_inter_company_edges_directed}")
print(f"Unique directed user-to-user edges (inter): {no_unique_inter_user_to_user}")
print(f"Unique directed user-to-user edges (intra): {no_unique_intra_user_to_user}")
print(f"Weighted density: {weighted_density_collaboration}")

No. of users: 70
No. of companies: 20
Inter-company GH actions: 8
Intra-company GH actions: 98
Unique inter-company edges (directed): 6
Unique directed user-to-user edges (inter): 6
Unique directed user-to-user edges (intra): 59
Weighted density: 0.015789473684210527


## 3.2 Generate summary table of edges

In [None]:
# Get the latex table summary for collaboration actions
collaboration_latex_summary, summary = summarize_company_interactions(
    collaboration_edges_user_level, network_type="collaboration"
)

# Save as LaTeX file
with open(fp_appendix_output / 'collaboration_table_4_action_summary_data_rows.tex', 'w') as f:
    f.write(collaboration_latex_summary)

print(summary)

NameError: name 'summarize_company_interactions' is not defined

## 3.3 Adjadency matrix (only inter-level connections)

In [None]:
# Compute the adjacency matrix for inter-company edges
matrix, latex_matrix = compute_adjacency_matrix_inter(
    collaboration_edges_user_level
)

# Filter data rows only
collaboration_adjacency_inter = filter_data_rows(latex_matrix, adjacency_table=True)
print(collaboration_adjacency_inter)

# Save the latex table to a file
with open(fp_appendix_output / "collaboration_table_1_adjacency_inter_data_rows.tex", "w") as f:
    f.write(collaboration_adjacency_inter)

1 Digital and marketing consultancies & 3 & 0 & 0 & 2 \\
2 Bespoke app companies & 2 & 0 & 0 & 0 \\
3 Data-broker- and infrastructure companies & 1 & 0 & 0 & 0 \\
4 Companies with specific digital part/app as part of service/product & 0 & 0 & 0 & 0 \\


## 3.4 Adjadency matrix (all connections, including intra)

In [None]:
# Compute the adjacency matrix for intra-company edges
matrix, latex_matrix = compute_adjacency_matrix_intra(collaboration_edges_user_level)

# Filter data rows only
collaboration_adjacency_intra = filter_data_rows(latex_matrix, adjacency_table=True)
print(collaboration_adjacency_intra)

# Save the latex table to a file
with open(fp_appendix_output / "collaboration_table_3_adjacency_intra_data_rows.tex", "w") as f:
    f.write(collaboration_adjacency_intra)

1 Digital and marketing consultancies & 87 & 0 & 0 & 2 \\
2 Bespoke app companies & 2 & 2 & 0 & 0 \\
3 Data-broker- and infrastructure companies & 1 & 0 & 2 & 0 \\
4 Companies with specific digital part/app as part of service/product & 0 & 0 & 0 & 10 \\


# 4 Attention Network Stats

## 4.1 General network statistics

In [28]:
# User level
no_users = len(pd.unique(attention_edges_user_level[['src', 'target']].values.ravel()))
no_unique_inter_user_to_user = attention_edges_user_level[attention_edges_user_level['d_inter_level'] == 1][['src', 'target']].drop_duplicates().shape[0]
no_unique_intra_user_to_user = attention_edges_user_level[attention_edges_user_level['d_intra_level'] == 1][['src', 'target']].drop_duplicates().shape[0]

# Company level
no_companies = len(set(attention_edges_user_level["src_company"]).union(attention_edges_user_level["target_company"]))
no_inter_company_edges_directed = len([(u,v) for u,v, d in attention_graph.edges(data=True) if d.get("d_inter_level") == 1])

# Total weight of inter-company edges (user-level, directed)
no_inter_gh = attention_edges_user_level[attention_edges_user_level['d_inter_level'] == 1].shape[0]

# Total weight of self-loop edges (src_company == tgt_company)z
no_intra_gh = attention_edges_user_level[attention_edges_user_level['d_intra_level'] == 1].shape[0]

# Calculate the weighted density of the attention graph
weighted_density_attention = calculate_weighted_density(attention_graph)

print(f"No. of users: {no_users}")
print(f"No. of companies: {no_companies}")
print(f"Inter-company GH actions: {no_inter_gh}")
print(f"Intra-company GH actions: {no_intra_gh}")
print(f"Unique inter-company edges (directed): {no_inter_company_edges_directed}")
print(f"Unique directed user-to-user edges (inter): {no_unique_inter_user_to_user}")
print(f"Unique directed user-to-user edges (intra): {no_unique_intra_user_to_user}")
print(f"Weighted density: {weighted_density_attention}")

No. of users: 357
No. of companies: 41
Inter-company GH actions: 306
Intra-company GH actions: 4321
Unique inter-company edges (directed): 53
Unique directed user-to-user edges (inter): 94
Unique directed user-to-user edges (intra): 500
Weighted density: 0.05731707317073171


## 4.2 Generate summary table of edges

In [None]:
# Get the latex table summary for attention actions
attention_latex_summary, summary = summarize_company_interactions(
    attention_edges_user_level, network_type="attention"
)

# save as LaTeX file
latex_file_path = fp_appendix_output / 'attention_table_6_action_summary_data_rows.tex'
with open(latex_file_path, 'w') as f:
    f.write(attention_latex_summary)

print(summary)

abtion & 0 & 2 & 1 & 1 & 0 & 1 & 0 & 0 & 1 & 0 & 110 & 9 & 99 & 2 \\
capgemini & 0 & 4 & 0 & 4 & 0 & 0 & 0 & 0 & 4 & 0 & 14 & 0 & 14 & 0 \\
cbrain & 0 & 24 & 4 & 20 & 0 & 0 & 4 & 4 & 10 & 6 & 10 & 1 & 9 & 0 \\
charlie tango & 0 & 2 & 0 & 2 & 0 & 0 & 0 & 1 & 0 & 1 & 155 & 16 & 127 & 12 \\
codefort & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 39 & 1 & 38 & 0 \\
commentor & 0 & 2 & 0 & 2 & 0 & 0 & 0 & 0 & 0 & 2 & 0 & 0 & 0 & 0 \\
creuna & 0 & 3 & 3 & 0 & 3 & 0 & 0 & 0 & 0 & 0 & 6 & 0 & 6 & 0 \\
delegateas & 0 & 10 & 8 & 2 & 4 & 0 & 4 & 0 & 0 & 2 & 88 & 7 & 79 & 2 \\
deondigital & 0 & 17 & 13 & 4 & 6 & 3 & 4 & 2 & 2 & 0 & 21 & 6 & 15 & 0 \\
eg a s & 0 & 2 & 0 & 2 & 0 & 0 & 0 & 0 & 2 & 0 & 105 & 1 & 36 & 68 \\
fellowmind & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 5 & 2 & 3 & 0 \\
ffw & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 5 & 0 & 5 & 0 \\
holion & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 11 & 0 & 11 & 0 \\
house of code & 0 & 59 & 1 & 58 & 0 & 1 & 0 & 0 & 56 & 2 & 2 & 0 & 2 & 0 \\
immeo & 0 &

## 4.3 Adjadency matrix (only inter-level conenctions)

In [None]:
# Compute the adjacency matrix for inter-company edges
matrix, latex_matrix = compute_adjacency_matrix_inter(
    attention_edges_user_level
)

# Filter data rows only
attention_adjacency_inter = filter_data_rows(latex_matrix, adjacency_table=True)
print(attention_adjacency_inter)

# Save the latex table to a file
with open(fp_appendix_output / "attention_table_5_adjacency_inter_data_rows.tex", "w") as f:
    f.write(attention_adjacency_inter)

1 Digital and marketing consultancies & 58 & 24 & 6 & 74 \\
2 Bespoke app companies & 88 & 0 & 0 & 8 \\
3 Data-broker- and infrastructure companies & 3 & 0 & 0 & 4 \\
4 Companies with specific digital part/app as part of service/product & 15 & 1 & 13 & 12 \\


## 4.4 Adjadency matrix (all connections, including intra-level)

In [None]:
# Compute the adjacency matrix for intra-company edges
matrix, latex_matrix = compute_adjacency_matrix_intra(attention_edges_user_level)

# Filter data rows only
attention_adjacency_intra = filter_data_rows(latex_matrix, adjacency_table=True)
print(attention_adjacency_intra)

# Save the latex table to a file
with open(fp_appendix_output / "attention_2_adjacency_intra_data_rows.tex", "w") as f:
    f.write(attention_adjacency_intra)

1 Digital and marketing consultancies & 3299 & 24 & 6 & 74 \\
2 Bespoke app companies & 88 & 208 & 0 & 8 \\
3 Data-broker- and infrastructure companies & 3 & 0 & 237 & 4 \\
4 Companies with specific digital part/app as part of service/product & 15 & 1 & 13 & 647 \\
