<a href="https://colab.research.google.com/github/Yasserashraf1/AI-Salaries-Analysis-Project-Using-R-and-Tableau/blob/main/SNP_20221370967.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Imports

In [None]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

### Sample Graph

In [None]:
df = pd.read_csv("D:\\Learning\\FCDS\\Social Networks\\Project\\soc-sign-bitcoinotc.csv")   #

# If columns are A, B, C, D
source_col = df.columns[0]
target_col = df.columns[1]
weight_col = df.columns[2]

print("Preview:")
display(df.head())


Preview:


Unnamed: 0,6,2,4,1289241911.72836
0,6,5,2,1289242000.0
1,1,15,1,1289243000.0
2,4,3,7,1289245000.0
3,13,16,8,1289254000.0
4,13,10,8,1289254000.0


In [None]:
G = nx.Graph()   # or nx.DiGraph() if directed

for _, row in df.iterrows():
    u = int(row[source_col])
    v = int(row[target_col])
    w = float(row[weight_col])
    G.add_edge(u, v, weight=w)

print("Graph loaded!")
print(f"Number of nodes: {G.number_of_nodes()}")
print(f"Number of edges: {G.number_of_edges()}")


Graph loaded!
Number of nodes: 5881
Number of edges: 21492


## Node Degree

In [None]:
degree_dict = dict(G.degree())

deg_df = pd.DataFrame({
    "node": list(degree_dict.keys()),
    "degree": list(degree_dict.values())
}).sort_values("degree", ascending=False)

print("=== Top 100 Highest Degree Nodes ===")
display(deg_df.head(100))

print("\n=== Top 100 Lowest Degree Nodes ===")
display(deg_df.tail(100))


=== Top 100 Highest Degree Nodes ===


Unnamed: 0,node,degree
23,35,795
1785,1810,439
2571,2642,438
2071,2125,436
1980,2028,326
...,...,...
401,468,63
4733,4833,63
727,775,62
237,280,62



=== Top 100 Lowest Degree Nodes ===


Unnamed: 0,node,degree
5730,5855,1
5731,5817,1
5732,5856,1
5733,5857,1
3325,3434,1
...,...,...
3364,3471,1
5789,5915,1
5790,5916,1
5792,5918,1


## Average Degree

In [None]:
avg_degree = sum(degree_dict.values()) / G.number_of_nodes()

print("=== Average Degree ===")
print(f"Average degree: {avg_degree:.4f}")


=== Average Degree ===
Average degree: 7.3090


## Graph Connectivity

In [None]:
print("=== Connectivity ===")

if nx.is_connected(G):
    print("Graph is connected.")
else:
    print("Graph is NOT connected.")
    comps = sorted(nx.connected_components(G), key=len, reverse=True)
    print(f"Number of connected components: {len(comps)}")
    print(f"Largest component size: {len(comps[0])}")


=== Connectivity ===
Graph is NOT connected.
Number of connected components: 4
Largest component size: 5875


## Density

In [None]:
density = nx.density(G)

print("=== Graph Density ===")
print(f"Density: {density:.6f}")


=== Graph Density ===
Density: 0.001243


## Clustering Coefficient + Top 100

In [None]:
clustering = nx.clustering(G)

clust_df = pd.DataFrame({
    "node": list(clustering.keys()),
    "clustering": list(clustering.values())
}).sort_values("clustering", ascending=False)

print("=== Top 100 Highest Clustering ===")
display(clust_df.head(100))

print("=== Top 100 Lowest Clustering ===")
display(clust_df.tail(100))


=== Top 100 Highest Clustering ===


Unnamed: 0,node,clustering
1572,1621,1.0
5773,5900,1.0
5794,5920,1.0
5869,5994,1.0
5865,5956,1.0
...,...,...
1685,1741,1.0
1687,1720,1.0
1768,1823,1.0
1810,1861,1.0


=== Top 100 Lowest Clustering ===


Unnamed: 0,node,clustering
67,97,0.0
5709,5832,0.0
5710,5833,0.0
5711,5835,0.0
5785,5911,0.0
...,...,...
5874,5998,0.0
5875,5999,0.0
5876,6000,0.0
5877,6002,0.0


## Betweenness Centrality (for 5000 nodes)

In [None]:
bet = nx.betweenness_centrality(G, k=300, normalized=True, seed=42)

bet_df = pd.DataFrame({
    "node": list(bet.keys()),
    "betweenness": list(bet.values())
}).sort_values("betweenness", ascending=False)

print("=== Top 100 Highest Betweenness ===")
display(bet_df.head(100))

print("\n=== Top 100 Lowest Betweenness ===")
display(bet_df.tail(100))


=== Top 100 Highest Betweenness ===


Unnamed: 0,node,betweenness
23,35,0.186848
2071,2125,0.077039
1785,1810,0.060125
3032,3129,0.056694
2,1,0.056505
...,...,...
4444,4554,0.005379
4521,4649,0.005250
155,198,0.005228
675,726,0.005226



=== Top 100 Lowest Betweenness ===


Unnamed: 0,node,betweenness
5722,5848,0.0
5723,5849,0.0
5725,5850,0.0
5728,5853,0.0
5782,5906,0.0
...,...,...
5794,5920,0.0
37,55,0.0
40,65,0.0
42,66,0.0


## Eigenvector Centrality

In [None]:
eigen = nx.eigenvector_centrality(G, max_iter=2000)

eig_df = pd.DataFrame({
    "node": list(eigen.keys()),
    "eigenvector": list(eigen.values())
}).sort_values("eigenvector", ascending=False)

print("=== Top 100 Most Influential Nodes (Eigenvector) ===")
display(eig_df.head(100))


=== Top 100 Most Influential Nodes (Eigenvector) ===


Unnamed: 0,node,eigenvector
945,905,0.188653
1785,1810,0.182641
2571,2642,0.175077
1980,2028,0.154839
2071,2125,0.148704
...,...,...
1497,1543,0.045915
1796,1656,0.045656
159,202,0.045626
1951,1815,0.044458


## Clustering Coefficient

In [None]:
print("=== CLUSTERING COEFFICIENT ANALYSIS ===\n")

# Compute clustering coefficient for each node
clustering = nx.clustering(G)

# Convert to DataFrame for sorting and export
clust_df = pd.DataFrame({
    "node": list(clustering.keys()),
    "clustering_coefficient": list(clustering.values())
})

# Sort descending and ascending
clust_sorted_high  = clust_df.sort_values("clustering_coefficient", ascending=False)
clust_sorted_low   = clust_df.sort_values("clustering_coefficient", ascending=True)

# ---- Summary Statistics ----
avg_cl = clust_df["clustering_coefficient"].mean()
max_cl = clust_df["clustering_coefficient"].max()
min_cl = clust_df["clustering_coefficient"].min()

print(f"Number of nodes analyzed: {len(clust_df)}")
print(f"Average clustering coefficient: {avg_cl:.6f}")
print(f"Maximum clustering coefficient: {max_cl:.6f}")
print(f"Minimum clustering coefficient: {min_cl:.6f}")

print("\n--- Top 100 Nodes with Highest Clustering Coefficient ---")
display(clust_sorted_high.head(100))

print("\n--- Top 100 Nodes with Lowest Clustering Coefficient ---")
display(clust_sorted_low.head(100))


=== CLUSTERING COEFFICIENT ANALYSIS ===

Number of nodes analyzed: 5881
Average clustering coefficient: 0.177504
Maximum clustering coefficient: 1.000000
Minimum clustering coefficient: 0.000000

--- Top 100 Nodes with Highest Clustering Coefficient ---


Unnamed: 0,node,clustering_coefficient
1572,1621,1.0
5773,5900,1.0
5794,5920,1.0
5869,5994,1.0
5865,5956,1.0
...,...,...
1685,1741,1.0
1687,1720,1.0
1768,1823,1.0
1810,1861,1.0



--- Top 100 Nodes with Lowest Clustering Coefficient ---


Unnamed: 0,node,clustering_coefficient
5861,5986,0.0
5832,5961,0.0
5833,5962,0.0
5834,5963,0.0
5835,5953,0.0
...,...,...
4682,4068,0.0
5789,5915,0.0
5790,5916,0.0
5792,5918,0.0


## Stats in CSV

In [None]:
output_path = r"D:\Learning\FCDS\Social Networks\Project"

deg_df.to_csv(f"{output_path}\\degree_stats.csv", index=False)
clust_df.to_csv(f"{output_path}\\clustering_stats.csv", index=False)
bet_df.to_csv(f"{output_path}\\betweenness_stats.csv", index=False)
eig_df.to_csv(f"{output_path}\\eigen_stats.csv", index=False)

print("Saved all stats to CSV files on D drive.")


Saved all stats to CSV files on D drive.
