<a href="https://colab.research.google.com/github/hernanmorales-navarrete/DataAnalysisWithPython/blob/main/Network_Analysis_with_Python.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Network Analysis with Python


- This interactive Jupyter Notebook provides a hands-on guide to Social Network Analysis using Python and NetworkX. Through real examples and exercises, students will explore graph theory concepts, data importation, network visualization, centrality measures, structural analysis, path analysis, resilience assessment, and community detection. This notebook engages students in understanding the relationships and patterns within GitHub organizations' networks, fostering skills in data transformation, algorithm implementation, and advanced network analytics.

## Import necessary libraries


In [None]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import random
from networkx.algorithms import bipartite
from networkx.algorithms.centrality import degree_centrality, closeness_centrality, betweenness_centrality, eigenvector_centrality


## 1. Load the dataset

In [None]:
# Load the dataset from CSV, remove any rows where members are missing

full_data = pd.read_csv('/content/data_nets.csv')  # Adjust the path to your dataset
full_data.dropna(subset=['member'], inplace=True)  # Remove rows where member is None

# Set a seed for reproducibility and randomly select 100 rows
data = full_data.sample(n=100, random_state=2)


display(data)

## 2. Create a Bipartite Graph

In [None]:
# Bipartite Graph Creation: Create a bipartite graph with organizations and members.
B = nx.Graph()
B.add_nodes_from(data['Organisation'].unique(), bipartite=0)  # Add organizations as one set of nodes
B.add_nodes_from(data['member'].unique(), bipartite=1)  # Add members as the other set of nodes

# Add edges between organizations and members
for index, row in data.iterrows():
    B.add_edge(row['Organisation'], row['member'])


## 3. Visualize the Network


In [None]:
# Use NetworkX's draw function to visualize the network.

plt.figure(figsize=(25, 25))
# Generate positions for nodes using the spring layout, which places nodes based on a force-directed algorithm
pos = nx.spring_layout(B)
# Draw the graph with specified positions, enabling labels for nodes, and setting node and font sizes
nx.draw(B, pos, with_labels=True, node_size=10, font_size=5)
# Add a title to the plot for context
plt.title("GitHub Organizations Network")
plt.show()


## 4. Structural Analysis and Properties

In [None]:
# Calculate Degree Centrality
deg_centrality = degree_centrality(B)
print("Degree Centrality:", deg_centrality)

In [None]:
# Calculate Closeness Centrality
closeness = closeness_centrality(B)
print("Closeness Centrality:", closeness)

In [None]:
# Calculate Betweenness Centrality
betweenness = betweenness_centrality(B)
print("Betweenness Centrality:", betweenness)

In [None]:
# Calculate Eigenvector Centrality
eigenvector = eigenvector_centrality(B)
print("Eigenvector Centrality:", eigenvector)

In [None]:
# Identify Connected Components
connected_components = list(nx.connected_components(B))
print("Connected Components:", connected_components)


## 6. Community Detection

In [None]:
# Use algorithms like Girvan-Newman or Louvain for community detection
from networkx.algorithms.community import girvan_newman
communities = next(girvan_newman(B))
print("Detected Communities:", communities)
print("Detected Communities:", len(communities))

In [None]:
# Visualize Communities
plt.figure(figsize=(25, 25))
colors = ['red', 'blue', 'green', 'yellow']
for i, community in enumerate(communities):
    nx.draw_networkx_nodes(B, pos, nodelist=list(community), node_color=colors[i % len(colors)], node_size=20)
nx.draw_networkx_edges(B, pos, alpha=0.5)
plt.title("Communities in GitHub Organizations Network")
plt.show()


## 5. Resilience and Robustness

In [None]:
# Analyze network resilience by removing nodes or edges and observing changes in connectivity
original_components = nx.number_connected_components(B)
print("Original number of connected components:", original_components)

In [None]:
# Remove random nodes or edges and recheck components
B1 = B.copy()
B1.remove_node(data['member'].iloc[0])  # Removing a sample node for demonstration
new_components = nx.number_connected_components(B1)
print("New number of connected components after removal:", new_components)


In [None]:
# Set the number of nodes to remove for robustness analysis

random.seed(10)
num_nodes_to_remove = 10

# Make a copy of the original graph to preserve it
B_copy = B.copy()

# Initial number of connected components in the copied graph before any removals
initial_components = nx.number_connected_components(B_copy)
print("Initial number of connected components:", initial_components)

# Select random nodes from the 'member' set to remove
random_nodes = random.sample(list(data['Organisation'].dropna().unique()), num_nodes_to_remove)

# Remove each node one by one from the copied graph and track the number of connected components
for i, node in enumerate(random_nodes, start=1):
    B_copy.remove_node(node)  # Remove the node from the copy
    current_components = nx.number_connected_components(B_copy)  # Count connected components after removal
    print(f"After removing {i} node(s), number of connected components:", current_components)

# Summary: measure the change in the number of connected components
print("Total increase in connected components after removals:", current_components - initial_components)



plt.figure(figsize=(25, 25))
# Generate positions for nodes using the spring layout, which places nodes based on a force-directed algorithm
pos = nx.spring_layout(B_copy)
# Draw the graph with specified positions, enabling labels for nodes, and setting node and font sizes
nx.draw(B_copy, pos, with_labels=True, node_size=10, font_size=5)
# Add a title to the plot for context
plt.title("GitHub Organizations Network")
plt.show()