In [None]:
import requests
import base64
from collections import defaultdict
from joblib import Parallel, delayed
import matplotlib.pyplot as plt
import pandas as pd
import networkx as nx
from tqdm import tqdm
import time
import numpy as np
import ast
import netwulf as nw
import matplotlib.ticker as ticker

# Get Data

### Get top repositories

In [None]:
token = 'Insert GitHub Token Here'

headers = {
    'Accept': 'application/vnd.github.v3+json',
    'Authorization': f'token {token}',
}
repositories = []

def page_request(page, params, headers):
    base_url = 'https://api.github.com/search/repositories'
    contributirs = []
    params['page'] = page

    # Send a GET request to the GitHub API
    response = requests.get(base_url, params=params, headers=headers)
    response.raise_for_status()  # Check for errors

    # Add the repositories from this page to the list
    repositories.extend(response.json()['items'])

    
    return repositories

In [None]:
params = {
    'q': 'stars:>1 topic:machine-learning',  # Search for repositories with more than 1 star and the topic "machine-learning"
    'sort': 'stars',  # Sort by the number of stars
    'order': 'desc',  # Order in descending order (most stars first)
    'per_page': 100,  # Get 100 results per page
}

output = Parallel(n_jobs=4)(delayed(page_request)(page, params, headers) for page in tqdm(range(1, 11)))

In [None]:
params = {
    'q': 'stars:>1 topic:machine-learning',  # Search for repositories with more than 1 star and the topic "machine-learning"
    'sort': 'stars',  # Sort by the number of stars
    'order': 'desc',  # Order in descending order (most stars first)
    'per_page': 100,  # Get 100 results per page
}

output = Parallel(n_jobs=4)(delayed(page_request)(page, params, headers) for page in tqdm(range(1, 11)))

### Isolate unique repositories

In [None]:
repositories = [item for sublist in output for item in sublist]
len(repositories)

repositories2 = [item for sublist in output2 for item in sublist]
len(repositories2)

combine = repositories + repositories2
print(len(combine))

description = []
# check the unique number of repositories
unique_repositories = []
seen_ids = set()

for repo in combine:
    if repo['id'] not in seen_ids:
        unique_repositories.append(repo)
        seen_ids.add(repo['id'])

print(len(unique_repositories))

### Make df with repository name and full name

In [None]:
name = []
full_name = []
for repo in unique_repositories:
    name.append(repo['name'])
    full_name.append(repo['full_name'])

df_name = pd.DataFrame()
df_name['name'] = name[:len(name)-1]
df_name['full_name'] = full_name[:len(name)-1]

# save the dataframe
df_name.to_csv('repositories_name.csv', index=False)

### Get the contributors

In [None]:
def get_contributors(repo):  
    headers = {
    'Accept': 'application/vnd.github.v3+json',
    'Authorization': f'token {token}',
    }
    
    topics = []
    description = []
    stargazers_count = []
    company = []
    c = []
    
    # check if in unique repositories
    
    c.append(repo['owner']['login'])
    
    topics.append(repo['topics'])
    description.append(repo['description'])
    stargazers_count.append(repo['stargazers_count']) 

    url = f"https://api.github.com/repos/{repo['owner']['login']}/{repo['name']}/contributors"
    
    all_contributors = []
    page = 1

    while True:
        response = requests.get(url + '?page=' + str(page), headers=headers)
        
        # If the request was successful
        if response.status_code == 200:
            contri = response.json()
            
            if not contri:
                break
            i = 0
            while i < len(contri):
                
                contributor = contri[i]
                c.append(contributor['login'])
                user_response = requests.get(contributor['url'], headers=headers)
                if user_response.status_code == 200:
                    user = user_response.json()
                    if 'company' in user:
                        company.append(user['company'])
                    else:
                        company.append('None')
                    i += 1  # Increment the index

                elif user_response.status_code == 403:  # Rate limit exceeded
                    reset_time = int(user_response.headers['X-RateLimit-Reset'])  # Get the time when the rate limit will reset
                    sleep_time = max(reset_time - time.time(), 0)  # Calculate how long to sleep
                    print(f'Rate limit exceeded. Sleeping for {sleep_time} seconds.')
                    time.sleep(sleep_time)  # Sleep until the rate limit resets

                
        

            page += 1
        elif response.status_code == 403:  # Rate limit exceeded
            reset_time = int(response.headers['X-RateLimit-Reset'])  # Get the time when the rate limit will reset
            sleep_time = max(reset_time - time.time(), 0)  # Calculate how long to sleep
            print(f'Rate limit exceeded. Sleeping for {sleep_time} seconds.')
            time.sleep(sleep_time)  # Sleep until the rate limit resets
            continue  # Try the request again
        else:
            break
    all_contributors.append(c)


    return all_contributors, topics, description, stargazers_count, company

In [None]:
output = Parallel(n_jobs=4)(delayed(get_contributors)(repo) for repo in tqdm(unique_repositories))

### Create Dataframe

In [None]:
contri = []
topics = []
description = []
stargazers_count = []
company = []

for i in range(len(output)):
    contri.append(output[i][0][0])
    topics.append(output[i][1][0])
    description.append(output[i][2][0])
    stargazers_count.append(output[i][3][0])
    company.append(output[i][4][0])

print(len(contri))

### Get the owners of the repositories

In [None]:
company = []
contributor = []
url = "https://api.github.com/users/"

for i in tqdm(range(len(output))):
    for j in range(len(output[i][4])):
        if j == 0:
            while True:
                # add code to make api call about output[i][0][0][j]
                url ="https://api.github.com/users/"+ output[i][0][0][j]
                user_response = requests.get(url, headers=headers)
                if user_response.status_code==200:
                    company.append(user_response.json()['company'])
                    break
                elif user_response.status_code == 403:  # Rate limit exceeded
                    reset_time = int(user_response.headers['X-RateLimit-Reset'])  # Get the time when the rate limit will reset
                    sleep_time = max(reset_time - time.time(), 0)  # Calculate how long to sleep
                    time.sleep(sleep_time)  # Sleep until the rate limit resets
                

        elif output[i][4][j] == None:
            company.append("No Org")
        else:
            company.append(output[i][4][j])
        
        contributor.append(output[i][0][0][j])
        

### Check if company and contributor is same len

In [None]:
print(len(company))
print(len(contributor))

### Create Dataframe

In [None]:
df = pd.DataFrame()
df['contributors'] = contri
df['topics'] = topics
df['description'] = description
df['stargazers_count'] = stargazers_count

df.to_csv('repositories.csv', index=False)

contri_df = pd.DataFrame({
    'Contributor': contributor,
    'Company': company
})

contri_df['Company'][contri_df['Company'].isna()] = "No Org"


contri_df.to_csv('contributors.csv', index=False)

df.head()  # Display the first few rows of the DataFrame

### Load Dataframes

In [None]:
df_contributors = pd.read_csv('contributors.csv')
df_repositories = pd.read_csv('repositories.csv')

### Ensure df list elements are list and that there are no duplicates

In [None]:
df_contributors = df_contributors.drop_duplicates('Contributor')

df_repositories["contributors"] = df_repositories["contributors"].apply(ast.literal_eval)
df_repositories['topics'] = df_repositories['topics'].apply(ast.literal_eval)

### Merge files

In [None]:
# If company is "No Org", then inset identity key
identity_key_generator = iter(range(1, len(df_contributors) + 1))
df_contributors['Company'] = df_contributors['Company'].apply(lambda x: str(next(identity_key_generator)) if x == "No Org" else x)

# Display the first few rows
display(df_contributors)

# Expand the contributors list
df_repositories_expanded = df_repositories.explode('contributors')

# Display the first few rows
display(df_repositories_expanded.head())

# Merge the repositories and contributors dataframes
df_merged = pd.merge(df_repositories_expanded, df_contributors, left_on='contributors', right_on='Contributor', how='left')

# Drop the 'Contributor' column and rename the 'contributors' column and 'Company' column
df_merged = df_merged.drop(columns=['Contributor'])
df_merged = df_merged.rename(columns={'contributors': 'contributor'})
df_merged = df_merged.rename(columns={'Company': 'company'})

# Display the first few rows
display(df_merged.head())

# Print the shape of all dataframes
print(f"Contributors: {df_contributors.shape}")
print(f"Repositories: {df_repositories.shape}")
print(f"Repositories Expanded: {df_repositories_expanded.shape}")
print(f"Merged: {df_merged.shape}")

### Process company collumn

In [None]:
# slight processing of company to try and fix most comman issues

df_merged['company'] = df_merged['company'].astype(str)

df_merged['company'] = df_merged['company'].str.replace('Inc','')

df_merged['company'] = df_merged['company'].str.replace('https://github.com/','')

df_merged['company'] = df_merged['company'].str.replace('\'','')

df_merged['company'] = df_merged['company'].str.replace('[^\w\s]','')

df_merged['company'] = df_merged['company'].apply(lambda x: 'google' if 'google' in x.lower() else x.lower())

df_merged['company'] = df_merged['company'].apply(lambda x: 'facebook' if 'facebook' in x.lower() else x.lower())

df_merged['company'] = df_merged['company'].apply(lambda x: x.encode("ascii", "ignore").decode())

df_merged['company'] = df_merged['company'].str.replace(' ','')

df_merged['company'] = df_merged['company'].str.replace('@','')

df_merged['company'] = df_merged['company'].apply(lambda x: 'tensorflow' if 'tensorflow' in x.lower() else x.lower())

df_dict = df_merged.set_index('contributor')['company'].to_dict()

### Create adj list

In [None]:
adj_list2 = defaultdict(int)
count_of_total_contributors = 0
for row in df_repositories.iterrows():
    # check if in unique repositories
    # For each pair of contributors
    for i in range(len(row[1]['contributors'])):
        for j in range(i + 1, len(row[1]['contributors'])):
            # Get the logins of the contributors
            login1 = row[1]['contributors'][i]
            login2 = row[1]['contributors'][j]
            adj_list2[(login1, login2)] += 1
            count_of_total_contributors += 1
        
# print max value of the pair
max_value = max(adj_list2.values())
print(f"Max value: {max_value}")
print(min(adj_list2.values()))
# avarage value
print(f"avarage value: {sum(adj_list2.values())/len(adj_list2)}")

### Create graph with attribute and get number of unique companies

In [None]:
# Create a graph
G = nx.Graph()

weighted_edge_list = [(u, v, d) for (u, v), d in adj_list2.items() if d > 1]

G.add_weighted_edges_from(weighted_edge_list)

nx.set_node_attributes(G, df_dict, 'company')

# get uniqie companies in the graph using G
companies = set(nx.get_node_attributes(G, 'company').values())
print(f"Number of companies: {len(companies)}")

### basic network analysis - with bot

In [None]:
# Total number of nodes (authors)
num_nodes = G.number_of_nodes()

# Total number of links (collaborations)
num_edges = G.number_of_edges()

# Network's density
density = nx.density(G)

# Check if the network is fully connected
is_connected = nx.is_connected(G)

# Number of connected components
num_connected_components = nx.number_connected_components(G)

# Number of isolated nodes
num_isolated_nodes = nx.number_of_isolates(G)

# largest connected component
largest_cc = max(nx.connected_components(G), key=len)

In [None]:
print(f"Number of nodes: {num_nodes}")
print(f"Number of edges: {num_edges}")
print(f"Density: {density}")
print(f"Is the network fully connected? {'Yes' if is_connected else 'No'}")
print(f"Number of connected components: {num_connected_components}")
print(f"Number of isolated nodes: {num_isolated_nodes}")
print(f"largest connected component: {len(largest_cc)}")

weights = [data['weight'] for u, v, data in G.edges(data=True)]

# Calculate the maximum, minimum, and average weight
max_weight = max(weights)
min_weight = min(weights)
avg_weight = sum(weights) / len(weights)

print(f"Maximum weight: {max_weight}")
print(f"Minimum weight: {min_weight}")
print(f"Average weight: {avg_weight}")

In [None]:
# avarage degree
degree = dict(G.degree())
avg_degree = sum(degree.values()) / len(degree)
print(f"Average degree: {avg_degree}")

In [None]:
print(f'The top 10 highest degree nodes are: {sorted(degree.items(), key=lambda x: x[1], reverse=True)[:10]}')

### Analysis containing random networks

In [None]:
# Compute the propability p for the Computational Social Scientists network
num_nodes = len(G.nodes)
num_edges = len(G.edges)
p = num_edges / (num_nodes * (num_nodes - 1) / 2)
print(f'number of nodes: {num_nodes} and number of edges: {num_edges}')
print(f'The edge probability for the Computational Social Scientists network is {p}')

Gr = nx.erdos_renyi_graph(num_nodes, p)

In [None]:
degree = dict(G.degree())
degree_randomr = dict(Gr.degree())

avarage_degree = np.mean(list(degree.values())) 
avarage_degree_randomr = np.mean(list(degree_randomr.values()))

print(f'The average degree of github collabaration network {avarage_degree}')
print(f'The average degree of the Random Network is {avarage_degree_randomr}')

max_degree = max(degree, key=degree.get)
max_degree_randomr = max(degree_randomr, key=degree_randomr.get)

print(f'The node with the highest degree in the github collabaration network is {max_degree} with a degree of {degree[max_degree]}')
print(f'The node with the highest degree in the Random Network is {max_degree_randomr} with a degree of {degree_randomr[max_degree_randomr]}')

In [None]:
# Compute the degree distribution for the Computational Social Scientists network
degrees_real = [degree for node, degree in G.degree()]

print(f'number of nodes with degree 1: {len([degree for degree in degrees_real if degree == 1])}')
print(f'number of nodes with degree 2: {len([degree for degree in degrees_real if degree == 2])}')

# Compute the degree distribution for the random network
degrees_random = [degree for node, degree in Gr.degree()]

# Plot the degree distributions
plt.figure()
plt.hist(degrees_random, bins=50, density=True ,alpha=0.7, label='Random Network')
plt.hist(degrees_real, bins=3*10**3, density=True, alpha=0.7, label='Real Network')

# Add vertical lines for the average degrees
plt.axvline(np.mean(degrees_random), color='r', linestyle='--', label='Average degree (random network)')
plt.axvline(np.mean(degrees_real), color='b', linestyle='--', label='Average degree (real network)')

# log scale the x-axis
plt.xscale('log')
plt.legend()
plt.title('Degree Distributions')
plt.xlabel('Degree')
plt.ylabel('Frequency')
plt.show()

### Assorsativity

In [None]:
# degree and attribute assortativity
degree_correlation = nx.degree_assortativity_coefficient(G)
attribute_correlation = nx.attribute_assortativity_coefficient(G, 'company')

print(f'Degree Assortativity: {degree_correlation}')
print(f'Attribute Assortativity: {attribute_correlation}')

In [None]:
def configuration_model(G: nx.Graph, num_swaps: int):
    """
    Function to generate a configuration model of a given network.

    Args:
        G: nx.Graph, Network to consider
        num_swaps: int, Number of edge swaps to perform
    
    Returns:
        G_new: nx.Graph, Configuration model of the network
    """
    
    # Create an exact copy of the original network
    G_new = G.copy()

    # Perform edge swaps
    G_new = nx.double_edge_swap(G_new, nswap=num_swaps, max_tries=num_swaps*2)

    degree_assortativity = nx.degree_assortativity_coefficient(G_new)
    
    assorsativity = nx.attribute_assortativity_coefficient(G_new, 'company')

    return G_new, degree_assortativity, assorsativity

In [None]:

print(f'The degree assortativity coefficient for the github collaboration network is {nx.degree_assortativity_coefficient(G)}')

num_edges = G.number_of_edges()
num_networks = 100

# use tqdm and parallel to run congifuration model
output = Parallel(n_jobs=4)(delayed(configuration_model)(G, num_edges * 10) for _ in tqdm(range(num_networks)))

In [None]:
degree_assortativity_config = []
attribute_assortativity_config = []
for i in range(len(output)):
    degree_assortativity_config.append(output[i][1])
    attribute_assortativity_config.append(output[i][2])
    

print(f'lenght of degree_assortativity_config: {len(degree_assortativity_config)}')
print(f'lenght of attribute_assortativity_config: {len(attribute_assortativity_config)}')

In [None]:
fig, axs = plt.subplots(2, 1, figsize=(10, 10))

# Plot Degree Assortativity
axs[0].hist(degree_assortativity_config, bins=20, alpha=0.5, label='Random model')
axs[0].axvline(degree_correlation, color='r', linestyle='dashed', linewidth=2, label='Original network')
axs[0].set_xlabel('Assortativity coefficient')
axs[0].set_ylabel('Frequency')
axs[0].set_title('Degree Assortativity')
axs[0].legend()

# Plot Attribute Assortativity
axs[1].hist(attribute_assortativity_config, bins=20, alpha=0.5, label='Random model')
axs[1].axvline(attribute_correlation, color='r', linestyle='dashed', linewidth=2, label='Original network')
axs[1].set_xlabel('Assortativity coefficient')
axs[1].set_ylabel('Frequency')
axs[1].set_title('Attribute Assortativity')
axs[1].legend()

# Adjust layout for better spacing
fig.tight_layout()

plt.show()

### illustrate network and random network

In [None]:
stylized_network = nw.get_filtered_network(G,node_group_key='company')
stylized_network, config = nw.visualize(stylized_network)
# illustrative example of the network stylized_network
nw.draw_netwulf(stylized_network)

In [None]:
stylized_network = nw.get_filtered_network(G)
stylized_network, config = nw.visualize(stylized_network)
# illustrative example of the network stylized_network
nw.draw_netwulf(stylized_network)

In [None]:
stylized_network = nw.get_filtered_network(Gr)
stylized_network, config = nw.visualize(stylized_network)
# illustrative example of the network stylized_network
nw.draw_netwulf(stylized_network)

### Largest component and shortest parth

In [None]:
# largest component of the network
largest_cc = max(nx.connected_components(G), key=len)
largest_cc_random = max(nx.connected_components(Gr), key=len)

# Create subgraphs
largest_cc_subgraph = G.subgraph(largest_cc)
largest_cc_random_subgraph = Gr.subgraph(largest_cc_random)

print(f'The largest connected component of the github collaboration network has {len(largest_cc)} nodes')
print(f'The largest connected component of the Random Network has {len(largest_cc_random)} nodes')

# see avarage shortest path
shortest_path = nx.average_shortest_path_length(largest_cc_subgraph)
shortest_path_random = nx.average_shortest_path_length(largest_cc_random_subgraph)

print(f'The average shortest path length of the github collaboration network is {shortest_path}')
print(f'The average shortest path length of the Random Network is {shortest_path_random}')

### Network analysis 

### Basic analysis

In [None]:
# remove bot node and edge from the network (node with most degree)
GB = G.copy()
GB.remove_node(max_degree)

# Do the same analysis
num_nodes = GB.number_of_nodes()
num_edges = GB.number_of_edges()
density = nx.density(GB)
is_connected = nx.is_connected(GB)
num_connected_components = nx.number_connected_components(GB)
num_isolated_nodes = nx.number_of_isolates(GB)
largest_cc = max(nx.connected_components(GB), key=len)

print(f"Number of nodes: {num_nodes}")
print(f"Number of edges: {num_edges}")
print(f"Density: {density}")
print(f"Is the network fully connected? {'Yes' if is_connected else 'No'}")
print(f"Number of connected components: {num_connected_components}")
print(f"Number of isolated nodes: {num_isolated_nodes}")
print(f"largest connected component: {len(largest_cc)}")

# Compute the propability p for the Computational Social Scientists network
num_nodes = len(GB.nodes)
num_edges = len(GB.edges)
p = num_edges / (num_nodes * (num_nodes - 1) / 2)
print(f'number of nodes: {num_nodes} and number of edges: {num_edges}')
print(f'The edge probability for the Computational Social Scientists network is {p}')

Grb = nx.erdos_renyi_graph(num_nodes, p)

degree = dict(GB.degree())
degree_randomrm = dict(Grb.degree())

degree_list_real = list(degree.values())
degree_list_random = list(degree_randomrm.values())

In [None]:
avarage_degree = np.mean(list(degree.values())) 
avarage_degree_randomrm = np.mean(list(degree_randomrm.values()))

print(f'The average degree of github collabaration network {avarage_degree}')
print(f'The average degree of the Random Network is {avarage_degree_randomrm}')

max_degree = max(degree, key=degree.get)
max_degree_randomrm = max(degree_randomrm, key=degree_randomrm.get)

print(f'The node with the highest degree in the github collabaration network is {max_degree} with a degree of {degree[max_degree]}')
print(f'The node with the highest degree in the Random Network is {max_degree_randomrm} with a degree of {degree_randomrm[max_degree_randomrm]}')

### Illustrate distribution

In [None]:
plt.figure()
plt.hist(degree_list_random, bins=50, density=True ,alpha=0.7, label='Random Network')
plt.hist(degree_list_real, bins=5*10**2, density=True, alpha=0.7, label='Real Network')

# Add vertical lines for the average degrees
plt.axvline(np.mean(degree_list_random), color='r', linestyle='--', label='Average degree (random network)')
plt.axvline(np.mean(degree_list_real), color='b', linestyle='--', label='Average degree (real network)')

# log scale the x-axis
plt.xscale('log')

# Set xticks to only integers
plt.gca().xaxis.set_major_formatter(ticker.FormatStrFormatter('%d'))

# Set x-axis limits
plt.xlim([0.9, max(max(degree_list_random), max(degree_list_real))])

plt.legend()
plt.title('Degree Distributions - Graph without Bot Node')
plt.xlabel('Degree')
plt.ylabel('Frequency')
plt.savefig('degree_distribution.png')
plt.show()

### clustering coefficient

In [None]:
clustering_coefficient = nx.average_clustering(G)
clustering_coefficient_random = nx.average_clustering(Gr)

print(f'The clustering coefficient of the github collaboration network is {clustering_coefficient}')
print(f'The clustering coefficient of the Random Network is {clustering_coefficient_random}')
random_clustering = []
for i in tqdm(range(100)):
    # generate a random network
    GrCluster = nx.erdos_renyi_graph(num_nodes, p)
    random_clustering.append(nx.average_clustering(GrCluster))

print(f'The average clustering coefficient of the Random Network is {np.mean(random_clustering)}')
print(f'The standard deviation of the clustering coefficient of the Random Network is {np.std(random_clustering)}')

### Assortativity

In [None]:
degree_correlation = nx.degree_assortativity_coefficient(GB)
attribute_correlation = nx.attribute_assortativity_coefficient(GB, 'company')

print(f'Degree Assortativity: {degree_correlation}')
print(f'Attribute Assortativity: {attribute_correlation}')

In [None]:
num_edges = G.number_of_edges()
num_networks = 100

# use tqdm and parallel to run congifuration model
output = Parallel(n_jobs=4)(delayed(configuration_model)(GB, num_edges * 10) for _ in tqdm(range(num_networks)))

In [None]:
degree_assortativity_config_B = []
attribute_assortativity_config_B = []
for i in range(len(output)):
    degree_assortativity_config_B.append(output[i][1])
    attribute_assortativity_config_B.append(output[i][2])
    

print(f'lenght of degree_assortativity_config: {len(degree_assortativity_config_B)}')
print(f'lenght of attribute_assortativity_config: {len(attribute_assortativity_config_B)}')


fig, axs = plt.subplots(1, 2, figsize=(15, 5))

fig.suptitle('Degree and Attribute Assortativity - Graph without Bot Node', fontsize=16, weight='bold')

# Plot Degree Assortativity
axs[0].hist(degree_assortativity_config_B, bins=20, alpha=0.5, label='Random model')
axs[0].axvline(degree_correlation, color='r', linestyle='dashed', linewidth=2, label='Original network')
axs[0].set_xlabel('Assortativity coefficient')
axs[0].set_ylabel('Frequency')
axs[0].set_title('Degree Assortativity - Graph without Bot Node', fontsize=12, weight='bold')
axs[0].legend()

# Plot Attribute Assortativity
axs[1].hist(attribute_assortativity_config_B, bins=20, alpha=0.5, label='Random model')
axs[1].axvline(attribute_correlation, color='r', linestyle='dashed', linewidth=2, label='Original network')
axs[1].set_xlabel('Assortativity coefficient')
axs[1].set_ylabel('Frequency')
axs[1].set_title('Attribute Assortativity - Graph without Bot Node', fontsize=12, weight='bold')
axs[1].legend()

# Adjust layout for better spacing
fig.tight_layout()
plt.savefig('degree_attribute_assortativity.png')
plt.show()


### largest component

In [None]:
# largest component of the network
largest_cc = max(nx.connected_components(GB), key=len)
largest_cc_random = max(nx.connected_components(Grb), key=len)

# Create subgraphs
largest_cc_subgraph = GB.subgraph(largest_cc)
largest_cc_random_subgraph = Grb.subgraph(largest_cc_random)

print(f'The largest connected component of the github collaboration network has {len(largest_cc)} nodes')
print(f'The largest connected component of the Random Network has {len(largest_cc_random)} nodes')

# see avarage shortest path
shortest_path = nx.average_shortest_path_length(largest_cc_subgraph)
shortest_path_random = nx.average_shortest_path_length(largest_cc_random_subgraph)

print(f'The average shortest path length of the github collaboration network is {shortest_path}')
print(f'The average shortest path length of the Random Network is {shortest_path_random}')

### plot network

In [None]:
stylized_network = nw.get_filtered_network(GB,node_group_key='company')
stylized_network, config = nw.visualize(stylized_network)
# illustrative example of the network stylized_network
nw.draw_netwulf(stylized_network)