<a href="https://colab.research.google.com/github/Yasserashraf1/AI-Salaries-Analysis-Project-Using-R-and-Tableau/blob/main/project_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Imports

In [None]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

## Sample Graph

In [None]:
source_col = "source"
target_col = "target"
weight_col = "weight"
time_col = "time"

input_path = r"Put Your Path Here"
df = pd.read_csv(f"{input_path}\\soc-sign-bitcoinotc.csv", header=None, names=[source_col, target_col, weight_col, time_col])

print("Preview:")
display(df.head())

Preview:


Unnamed: 0,source,target,weight,time
0,6,2,4,1289242000.0
1,6,5,2,1289242000.0
2,1,15,1,1289243000.0
3,4,3,7,1289245000.0
4,13,16,8,1289254000.0


In [None]:
G = nx.DiGraph()

for _, row in df.iterrows():
    u = int(row[source_col])
    v = int(row[target_col])
    w = float(row[weight_col])
    G.add_edge(u, v, weight=w)

print("Graph loaded!")
print(f"Number of nodes: {G.number_of_nodes()}")
print(f"Number of edges: {G.number_of_edges()}")


Graph loaded!
Number of nodes: 5881
Number of edges: 35592


## Node Analysis

In [None]:
in_deg = dict(G.in_degree(weight='weight'))
out_deg = dict(G.out_degree(weight='weight'))

deg_df = pd.DataFrame({
    "node": list(in_deg.keys()),
    "in degree": list(in_deg.values()),
    "out degree": list(out_deg.values())
})

deg_df["total degree"] = deg_df["in degree"] + deg_df["out degree"]
deg_df = deg_df.sort_values("total degree", ascending=False)

avg_deg = sum(dict(G.in_degree()).values()) / G.number_of_nodes()


print("=== Top 10 Highest Degree Nodes ===")
display(deg_df.head(10))

print("\n=== Top 10 Lowest Degree Nodes ===")
display(deg_df.tail(10))

print("Average degree:", avg_deg)


=== Top 10 Highest Degree Nodes ===


Unnamed: 0,node,in degree,out degree,total degree
23,35,1016.0,874.0,1890.0
2571,2642,1041.0,764.0,1805.0
3,1,801.0,433.0,1234.0
10,7,614.0,511.0,1125.0
967,1018,471.0,304.0,775.0
4169,4291,360.0,337.0,697.0
4089,4197,416.0,261.0,677.0
7,13,341.0,286.0,627.0
1980,2028,202.0,376.0,578.0
1350,1396,237.0,281.0,518.0



=== Top 10 Lowest Degree Nodes ===


Unnamed: 0,node,in degree,out degree,total degree
3693,3791,-60.0,-280.0,-340.0
3691,3792,-60.0,-280.0,-340.0
3686,3786,-60.0,-280.0,-340.0
2349,2351,6.0,-348.0,-342.0
3689,3789,-60.0,-290.0,-350.0
2692,2691,4.0,-377.0,-373.0
2206,2266,26.0,-534.0,-508.0
2071,2125,439.0,-999.0,-560.0
3647,3744,-675.0,80.0,-595.0
1785,1810,230.0,-936.0,-706.0


Average degree: 6.052031967352491


## Special Graphs

In [None]:
reciprocal_edges = [(u, v) for u, v in G.edges() if G.has_edge(v, u)]
print("Number of reciprocal edges:", len(reciprocal_edges))

self_loops = list(nx.selfloop_edges(G))
print("Number of self-loops:", len(self_loops))

Number of reciprocal edges: 28200
Number of self-loops: 0


## Graph Connectivity

In [None]:
density = nx.density(G)
print("Density:", density)

Density: 0.0010292571373048454


In [None]:
is_strong = nx.is_strongly_connected(G)
is_Weak = nx.is_weakly_connected(G)
if is_strong:
    print("Graph is strongly connected.")

elif is_Weak:
    print("Graph is weakly connected.")
else:
    print("Graph isnt connected")

strongly_connected = list(nx.strongly_connected_components(G))
print("Number of strongly connected components:", len(strongly_connected))

weakly_connected = list(nx.weakly_connected_components(G))
print("Number of weakly connected components:", len(weakly_connected))

for i, comp in enumerate(weakly_connected, start=1):
    sub = G.subgraph(comp).copy()

    n = sub.number_of_nodes()
    m = sub.number_of_edges()

    print(f"=== Component {i} ===")
    print(f"Nodes: {n}, Edges: {m}")


Graph isnt connected
Number of strongly connected components: 1144
Number of weakly connected components: 4
=== Component 1 ===
Nodes: 5875, Edges: 35587
=== Component 2 ===
Nodes: 2, Edges: 2
=== Component 3 ===
Nodes: 2, Edges: 2
=== Component 4 ===
Nodes: 2, Edges: 1


## Adjancency Matrix Representation

In [None]:
adj_matrix = nx.to_pandas_adjacency(G, weight="weight")
print("Weighted adjacency matrix")
display(adj_matrix)

Weighted adjacency matrix


Unnamed: 0,6,2,5,1,15,4,3,13,16,10,...,5996,5992,5997,5998,5999,6000,6002,6003,6004,6005
6,0.0,4.0,2.0,8.0,0.0,2.0,0.0,0.0,0.0,3.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,5.0,0.0,0.0,8.0,0.0,5.0,8.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,3.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,8.0,8.0,4.0,0.0,1.0,10.0,6.0,3.0,0.0,7.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
15,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
6002,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6003,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6004,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Social Structure

In [None]:
# Density
density = nx.density(G)
print("Density:", density)

# Reciprocity
reciprocity = nx.reciprocity(G)
print("Reciprocity:", reciprocity)

Density: 0.0010292571373048454
Reciprocity: 0.7923128792987189


In [None]:
print("=== CLUSTERING COEFFICIENT ANALYSIS ===\n")

# Compute clustering coefficient for each node
clustering = nx.clustering(G.to_undirected())

# Convert to DataFrame for sorting and export
clust_df = pd.DataFrame({
    "node": list(clustering.keys()),
    "clustering_coefficient": list(clustering.values())
}).sort_values("clustering_coefficient", ascending=False)

# ---- Summary Statistics ----
avg_cl = sum(clust_df["clustering_coefficient"])/len(clust_df)
max_cl = clust_df["clustering_coefficient"].max()
min_cl = clust_df["clustering_coefficient"].min()

print(f"Number of nodes analyzed: {len(clust_df)}")
print(f"Average clustering coefficient: {avg_cl:.6f}")
print(f"Maximum clustering coefficient: {max_cl:.6f}")
print(f"Minimum clustering coefficient: {min_cl:.6f}")

print("\n--- Top 10 Nodes with Highest Clustering Coefficient ---")
display(clust_df.head(10))

print("\n--- Top 10 Nodes with Lowest Clustering Coefficient ---")
display(clust_df.tail(10))


=== CLUSTERING COEFFICIENT ANALYSIS ===

Number of nodes analyzed: 5881
Average clustering coefficient: 0.177504
Maximum clustering coefficient: 1.000000
Minimum clustering coefficient: 0.000000

--- Top 10 Nodes with Highest Clustering Coefficient ---


Unnamed: 0,node,clustering_coefficient
2,5,1.0
13,8,1.0
1092,1141,1.0
1093,1142,1.0
1095,1144,1.0
1107,945,1.0
1127,1174,1.0
1170,1218,1.0
1175,1223,1.0
1177,1225,1.0



--- Top 10 Nodes with Lowest Clustering Coefficient ---


Unnamed: 0,node,clustering_coefficient
5840,5968,0.0
30,46,0.0
5871,5996,0.0
5870,5995,0.0
5868,5993,0.0
5867,5991,0.0
5866,5990,0.0
5864,5989,0.0
5863,5988,0.0
5844,5970,0.0


## Centrality Measures

In [None]:
# Degree centrality
deg_centrality = nx.degree_centrality(G)

deg_cen_df = pd.DataFrame({
    "node": list(deg_centrality.keys()),
    "degree centrality": list(deg_centrality.values())
}).sort_values("degree centrality", ascending=False)

print("=== Top 10 Highest Degree Centrality ===")
display(deg_cen_df.head(10))

print("\n=== Top 10 Lowest Degree Centrality ===")
display(deg_cen_df.tail(10))


bet = nx.betweenness_centrality(G, k=300,  normalized=True, seed=42)

bet_df = pd.DataFrame({
    "node": list(bet.keys()),
    "betweenness": list(bet.values())
}).sort_values("betweenness", ascending=False)

print("=== Top 10 Highest Betweenness ===")
display(bet_df.head(10))

print("\n=== Top 10 Lowest Betweenness ===")
display(bet_df.tail(10))


=== Top 10 Highest Degree Centrality ===


Unnamed: 0,node,degree centrality
23,35,0.220748
2571,2642,0.139116
1785,1810,0.121599
2071,2125,0.098129
1980,2028,0.097279
945,905,0.089796
4071,4172,0.082653
10,7,0.07619
3,1,0.075
4089,4197,0.068878



=== Top 10 Lowest Degree Centrality ===


Unnamed: 0,node,degree centrality
5008,5106,0.00017
883,936,0.00017
5002,5099,0.00017
1415,1460,0.00017
1414,1459,0.00017
5017,5117,0.00017
5015,5083,0.00017
5014,5113,0.00017
5013,5112,0.00017
5012,5111,0.00017


=== Top 10 Highest Betweenness ===


Unnamed: 0,node,betweenness
23,35,0.131205
2571,2642,0.054284
945,905,0.051233
1785,1810,0.046147
3,1,0.043697
2071,2125,0.041671
4071,4172,0.03668
10,7,0.033462
1980,2028,0.029944
1899,1953,0.028191



=== Top 10 Lowest Betweenness ===


Unnamed: 0,node,betweenness
5855,5980,0.0
5872,5992,0.0
5873,5997,0.0
5874,5998,0.0
5875,5999,0.0
5876,6000,0.0
5877,6002,0.0
5878,6003,0.0
5879,6004,0.0
5846,5960,0.0


## Eigenvector Centrality

In [None]:
eigen = nx.eigenvector_centrality(G, max_iter=2000)

eig_df = pd.DataFrame({
    "node": list(eigen.keys()),
    "eigenvector": list(eigen.values())
}).sort_values("eigenvector", ascending=False)

print("=== Top 10 Most Influential Nodes (Eigenvector) ===")
display(eig_df.head(10))

=== Top 10 Most Influential Nodes (Eigenvector) ===


Unnamed: 0,node,eigenvector
2571,2642,0.202195
945,905,0.190131
1785,1810,0.16982
23,35,0.154773
1980,2028,0.150092
4071,4172,0.146302
3,1,0.135433
4169,4291,0.135274
1294,1334,0.131389
4089,4197,0.127946


## Stats in CSV

In [None]:
output_path = r"Put Your Path Here"

deg_df.to_csv(f"{output_path}\\degree_stats.csv", index=False)
clust_df.to_csv(f"{output_path}\\clustering_stats.csv", index=False)
deg_cen_df.to_csv(f"{output_path}\\degree_Centrality_stats.csv", index=False)
bet_df.to_csv(f"{output_path}\\betweenness_stats.csv", index=False)
eig_df.to_csv(f"{output_path}\\eigen_stats.csv", index=False)
