# Node-Level Measures

## Degree Centrality

In [18]:
import zipfile
import os
import networkx as nx

# Define the path to the uploaded ZIP file
#zip_path = "/mnt/data/7647000.zip"
extract_path = r"C:\Users\utente\Documents\GitHub\SNA_25\dataset"


# Extract the ZIP file
#with zipfile.ZipFile(zip_path, 'r') as zip_ref:
#    zip_ref.extractall(extract_path)

# List extracted contents to verify
extracted_files = []
for root, dirs, files in os.walk(extract_path):
    for file in files:
        extracted_files.append(os.path.join(root, file))

extracted_files


['C:\\Users\\utente\\Documents\\GitHub\\SNA_25\\dataset\\Edges_t1.csv',
 'C:\\Users\\utente\\Documents\\GitHub\\SNA_25\\dataset\\Edges_t11_1.csv',
 'C:\\Users\\utente\\Documents\\GitHub\\SNA_25\\dataset\\Edges_t11_10.csv',
 'C:\\Users\\utente\\Documents\\GitHub\\SNA_25\\dataset\\Edges_t11_2.csv',
 'C:\\Users\\utente\\Documents\\GitHub\\SNA_25\\dataset\\Edges_t11_3.csv',
 'C:\\Users\\utente\\Documents\\GitHub\\SNA_25\\dataset\\Edges_t11_4.csv',
 'C:\\Users\\utente\\Documents\\GitHub\\SNA_25\\dataset\\Edges_t11_5.csv',
 'C:\\Users\\utente\\Documents\\GitHub\\SNA_25\\dataset\\Edges_t11_6.csv',
 'C:\\Users\\utente\\Documents\\GitHub\\SNA_25\\dataset\\Edges_t11_7.csv',
 'C:\\Users\\utente\\Documents\\GitHub\\SNA_25\\dataset\\Edges_t11_8.csv',
 'C:\\Users\\utente\\Documents\\GitHub\\SNA_25\\dataset\\Edges_t11_9.csv',
 'C:\\Users\\utente\\Documents\\GitHub\\SNA_25\\dataset\\Edges_t2.csv',
 'C:\\Users\\utente\\Documents\\GitHub\\SNA_25\\dataset\\Edges_t6.csv',
 'C:\\Users\\utente\\Documents\\G

In [19]:
import pandas as pd

# Load all node files and check for missing values
node_files = [f for f in extracted_files if "Nodes" in f]
missing_info_summary = {}

for file in node_files:
    df = pd.read_csv(file)
    missing_counts = df.isnull().sum()
    if missing_counts.any():
        missing_info_summary[file] = missing_counts[missing_counts > 0]

missing_info_summary


{'C:\\Users\\utente\\Documents\\GitHub\\SNA_25\\dataset\\Nodes_t1.csv': prosocial    54
 crttotal     78
 dtype: int64,
 'C:\\Users\\utente\\Documents\\GitHub\\SNA_25\\dataset\\Nodes_t2.csv': prosocial    35
 crttotal     36
 dtype: int64}

In [20]:
# Inspect a sample node file to check column names
sample_node_file = node_files[0]
sample_node_df = pd.read_csv(sample_node_file)
sample_node_df.columns


Index(['Unnamed: 0', 'ID', 'Curso', 'Grupo', 'Sexo', 'prosocial', 'crttotal'], dtype='object')

In [22]:
import os
import pandas as pd
import networkx as nx

extract_path = r"C:\Users\utente\Documents\GitHub\SNA_25\dataset"  # or use "/mnt/data/..." on Linux

in_out_centrality_summary = {}

# Get all edge files in the directory
extracted_files = [
    os.path.join(extract_path, f)
    for f in os.listdir(extract_path)
    if f.startswith("Edges_") and f.endswith(".csv")
]

for edge_file in extracted_files:
    school_id = os.path.basename(edge_file).replace("Edges_", "").replace(".csv", "")
    
    try:
        edge_df = pd.read_csv(edge_file)
        node_file = edge_file.replace("Edges", "Nodes")
        if not os.path.exists(node_file):
            continue
        node_df = pd.read_csv(node_file)

        # Detect ID column
        id_col = None
        for col in node_df.columns:
            if col.lower() == "id":
                id_col = col
                break
        if not id_col:
            continue

        # Ensure required edge columns are present
        if not {'from', 'to', 'weight'}.issubset(edge_df.columns):
            continue

        # Filter positive edges
        positive_edges = edge_df[edge_df['weight'] > 0]

        # Build graph
        G = nx.DiGraph()
        G.add_nodes_from(node_df[id_col])
        G.add_weighted_edges_from(positive_edges[['from', 'to', 'weight']].values)

        # Compute in-degree and out-degree centrality
        in_deg = nx.in_degree_centrality(G)
        out_deg = nx.out_degree_centrality(G)

        # Compute summaries
        top_in = sorted(in_deg.items(), key=lambda x: x[1], reverse=True)[:3]
        top_out = sorted(out_deg.items(), key=lambda x: x[1], reverse=True)[:3]
        mean_in = sum(in_deg.values()) / len(in_deg) if in_deg else 0
        mean_out = sum(out_deg.values()) / len(out_deg) if out_deg else 0

        in_out_centrality_summary[school_id] = {
            "mean_in_degree": mean_in,
            "mean_out_degree": mean_out,
            "top_3_in_degree": top_in,
            "top_3_out_degree": top_out
        }

    except Exception as e:
        in_out_centrality_summary[school_id] = {
            "error": str(e)
        }

in_out_df = pd.DataFrame.from_dict(in_out_centrality_summary, orient='index')
in_out_df


Unnamed: 0,mean_in_degree,mean_out_degree,top_3_in_degree,top_3_out_degree
t1,0.043758,0.043758,"[(463, 0.11764705882352941), (485, 0.102941176...","[(392, 0.34558823529411764), (558, 0.161764705..."
t11_1,0.047257,0.047257,"[(3499, 0.3448275862068966), (3787, 0.16206896...","[(3676, 0.2655172413793103), (3722, 0.23103448..."
t11_10,0.022922,0.022922,"[(5142, 0.27973568281938327), (5235, 0.0660792...","[(5235, 0.16519823788546256), (5216, 0.1343612..."
t11_2,0.017608,0.017608,"[(4118, 0.218978102189781), (4445, 0.058394160...","[(4360, 0.21751824817518248), (4340, 0.1591240..."
t11_3,0.061697,0.061697,"[(4824, 0.211340206185567), (5043, 0.195876288...","[(4835, 0.25257731958762886), (5059, 0.2319587..."
t11_4,0.076401,0.076401,"[(4921, 0.42857142857142855), (4964, 0.1428571...","[(4943, 0.24369747899159663), (4963, 0.2352941..."
t11_5,0.052995,0.052995,"[(3370, 0.22448979591836737), (3484, 0.1306122...","[(3314, 0.1795918367346939), (3311, 0.17142857..."
t11_6,0.08908,0.08908,"[(2537, 0.2222222222222222), (2538, 0.19444444...","[(2530, 0.40277777777777773), (2615, 0.3611111..."
t11_7,0.091486,0.091486,"[(3059, 0.1844660194174757), (3084, 0.18446601...","[(3043, 0.4077669902912621), (3061, 0.34951456..."
t11_8,0.018021,0.018021,"[(2297, 0.048), (2147, 0.042666666666666665), ...","[(2133, 0.17066666666666666), (2114, 0.1679999..."
