File for finding the metrics of each of the graphs at different distances.

In [1]:
import os
import re
import pandas as pd
import igraph as ig
import numpy as np
from pyproj import Transformer
import geopandas as gpd
from shapely.geometry import Point, LineString
import geopandas as gpd
import graph_functions as graf

Some function examples

In [None]:
print(f'density : {test_graph.density()}')
print(f'average path length : {test_graph.average_path_length()}')
print(f'diameter : {test_graph.diameter()}')
print(f'transitivity : {test_graph.transitivity_undirected()}')
print(f"kleinberg's hub score : {max(test_graph.hub_score())}") # will need to attach to lek utm's
print(f'cutpoints : {test_graph.articulation_points()}') # will need to attach to lek utm's

Function testing

In [None]:
given_directory = 'E:/!!Research/!!!Data/graph_analysis/igraph_graphs/coalescence'
years_ex_list = []
type_list = []
mean_clust_info = []
num_clust_info = []
types_list = ['average', 'dispersal', 'coalescence', 'round', 'trip', '48', 'max']
adjacency_files = [f for f in os.listdir(given_directory) if f.endswith('.adjacency')]

for adj_file in adjacency_files:
    file_path = f'{given_directory}/{adj_file}'
    #parts = adj_file.split('_')
    path_parts = adj_file.split('.')
    parts = []
    for piece in path_parts:
        pieces = piece.split('_')
        parts.append(pieces)
    #print(parts[0][0])

    #Create the graph from the file
    graph = ig.Graph.Read_Adjacency(file_path)
    # Cluster stuff
    clusters = graph.connected_components()
    # number of clusters
    num_clust_info.append(len(clusters))
    # mean size of clusters
    cluster_sizes = clusters.sizes()
    print(cluster_sizes)
    mean_cluster_size = sum(cluster_sizes) / len(cluster_sizes)
    mean_clust_info.append(mean_cluster_size)
    """
    years_ex_list.append(parts[0])
    found_types = []
    for part in parts:
        if part in types_list:
            found_types.append(part)
            
    if found_types:
        type_list.append('_'.join(found_types))"""
            

#print(years_ex_list)
#print(type_list)

Global metrics

In [None]:
def global_metrics_df(given_directory):
    # graph_type - already have a list of keywords to look for
    types_list = ['average', 'dispersal', 'disperse', 'coalescence', 'round', 'trip', '48', 'max']
    graph_type = []
    # to easily find the year
    years = []
    densitys = []
    avg_path_lengths = []
    diameters = []
    transitivitys = []
    num_clusters = []
    mean_clusters = []
    max_cluster_sizes = []

    adjacency_files = [f for f in os.listdir(f'{given_directory}') if f.endswith('.adjacency')]

    for adj_file in adjacency_files:
        file_path = f'{given_directory}/{adj_file}'
        path_parts = adj_file.split('.')
        parts = []
        for piece in path_parts:
            pieces = piece.split('_')
            parts.append(pieces)
        #parts = path_parts.split('_')
        years.append(parts[0][0])
        found_types = []
        for part in parts[0]:
            if part in types_list:
                found_types.append(part)
                
        if found_types:
            graph_type.append('_'.join(found_types))
        
        #Create the graph from the file
        graph = ig.Graph.Read_Adjacency(file_path)
        # Density
        densitys.append(graph.density())
        # Average path length
        avg_path_lengths.append(graph.average_path_length())
        # Diameter
        diameters.append(graph.diameter())
        # Undirected transitivity
        transitivitys.append(graph.transitivity_undirected())
        #
        # Cluster stuff
        clusters = graph.connected_components()
        # number of clusters
        num_clusters.append(len(clusters))
        # mean size of clusters
        cluster_sizes = clusters.sizes()
        mean_cluster_size = sum(cluster_sizes) / len(cluster_sizes)
        mean_clusters.append(mean_cluster_size)
        # Max size of a cluster
        max_cluster_sizes.append(max(cluster_sizes))
        

    data = {'graph_type' : graph_type,
                'year' : years,
                'density' : densitys,
                'avg_path_length' : avg_path_lengths,
                'diameter' : diameters,
                'transitivity' : transitivitys,
                'clusters' : num_clusters,
                'mean_cluster_size' : mean_clusters,
                'max_cluster_size' : max_cluster_sizes
                }
    df = pd.DataFrame(data)
    return df

In [None]:
average_dispersal_df = global_metrics_df('E:/!!Research/!!!Data/graph_analysis/igraph_graphs/average_dispersal')
round_trip_df = global_metrics_df('E:/!!Research/!!!Data/graph_analysis/igraph_graphs/round_trip')
thresh48_df = global_metrics_df('E:/!!Research/!!!Data/graph_analysis/igraph_graphs/threshold_4_8')
max_disperse_df = global_metrics_df('E:/!!Research/!!!Data/graph_analysis/igraph_graphs/threshold_max_disperse')
coalescence_df = global_metrics_df('E:/!!Research/!!!Data/graph_analysis/igraph_graphs/coalescence')

frames = [average_dispersal_df, round_trip_df, thresh48_df, max_disperse_df, coalescence_df]
frame_concat = pd.concat(frames)
frame_concat.to_csv("E:/!!Research/!!!Data/graph_analysis/igraph_metrics_data/lek_rolling_binary_global_metrics.csv")

Per node metrics

In [77]:
def node_metrics_df(given_directory, given_df):
    # graph_type - already have a list of keywords to look for
    types_list = ['average', 'dispersal', 'disperse', 'coalescence', 'round', 'trip', '48', 'max']
    leks = [] # lek id
    # lek coordinates
    x_easting = []
    y_northing = []
    
    graph_type = [] # graph type
    years = [] # to easily find the year
    hub_scores = []
    centrality_scores = []
    #articulation_point = []


    adjacency_files = [f for f in os.listdir(f'{given_directory}') if f.endswith('.adjacency')]

    for adj_file in adjacency_files:
        file_path = f'{given_directory}/{adj_file}'
        path_parts = adj_file.split('.')
        parts = []
        for piece in path_parts:
            pieces = piece.split('_')
            parts.append(pieces)

        year = parts[0][0]
        year_df = given_df[given_df['year'] == int(year)]

        found_types = []
        for part in parts[0]:
            if part in types_list:
                found_types.append(part)
                
        if found_types:
            type_string = '_'.join(found_types)
            temp_list = [type_string]*len(year_df)
            graph_type.extend(temp_list)
        

        # appending year
        years_temp_list = year_df['year'].tolist()
        years.extend(years_temp_list)

        #for item in years_temp_list:
        #    years.append(years_temp_list)
        # appending lek
        leks_temp_list = year_df['lek_id'].tolist()
        #leks.extend(leks_temp_list)
        for lek in leks_temp_list:
            leks.append(lek)
        # appending easting
        easting_temp_list = year_df['x_easting'].tolist()
        x_easting.extend(easting_temp_list)
        # appending northing
        northing_temp_list = year_df['y_northing'].tolist()
        y_northing.extend(northing_temp_list)
        
        #Create the graph from the file
        graph = ig.Graph.Read_Adjacency(file_path)

        # hub scores from be kleinberg's hub scores
        hub_scores.extend(graph.hub_score())

        # stepping stones from betweenness centrality
        centrality_scores.extend(graph.betweenness())
        
        # cutpoints from articulation points
        #articulation_status = []
        #articulation_points = graph.articulation_points()
        #for lek in leks_temp_list:
        #    is_articulation = graph.vs.find(name = lek).index in articulation_points
        #    articulation_status.append(is_articulation)
        #articulation_point.extend(articulation_status)

    
    data = {'lek' : leks,
            'x_easting' : x_easting,
            'y_northing' : y_northing,
            'graph_type' : graph_type,
            'year' : years,
            'hub_score' : hub_scores,
            'centrality_score' : centrality_scores
            #'articulation_point' : articulation_point,
            }
    df = pd.DataFrame(data)
    return df

In [78]:
# importing the data
df = pd.read_csv("E:/!!Research/!!!Data/graph_analysis/lek_data/lek_data_binary_rolling_activity.csv")
# dropping the column from the last time was exported
df.drop('Unnamed: 0', axis = 1, inplace = True)
# Finding only leks active per year
df_active = df[df['active_last_5_years'] == 'TRUE']

# creating df for each type of graph
average_dispersal_df = node_metrics_df('E:/!!Research/!!!Data/graph_analysis/igraph_graphs/average_dispersal', df_active)
round_trip_df = node_metrics_df('E:/!!Research/!!!Data/graph_analysis/igraph_graphs/round_trip', df_active)
thresh48_df = node_metrics_df('E:/!!Research/!!!Data/graph_analysis/igraph_graphs/threshold_4_8', df_active)
max_disperse_df = node_metrics_df('E:/!!Research/!!!Data/graph_analysis/igraph_graphs/threshold_max_disperse', df_active)
coalescence_df = node_metrics_df('E:/!!Research/!!!Data/graph_analysis/igraph_graphs/coalescence', df_active)

# merging into final csv
frames = [average_dispersal_df, round_trip_df, thresh48_df, max_disperse_df, coalescence_df]
frame_concat = pd.concat(frames)
frame_concat.to_csv("E:/!!Research/!!!Data/graph_analysis/igraph_metrics_data/lek_rolling_binary_node_metrics.csv")


Testing function

In [None]:
def node_metrics_df(given_directory, given_df):
    # graph_type - already have a list of keywords to look for
    types_list = ['average', 'dispersal', 'disperse', 'coalescence', 'round', 'trip', '48', 'max']
    leks = [] # lek id
    # lek coordinates
    x_easting = []
    y_northing = []
    
    graph_type = [] # graph type
    years = [] # to easily find the year
    hub_scores = []
    centrality_scores = []
    #articulation_point = []


    adjacency_files = [f for f in os.listdir(f'{given_directory}') if f.endswith('.adjacency')]

    for adj_file in adjacency_files:
        file_path = f'{given_directory}/{adj_file}'
        path_parts = adj_file.split('.')
        parts = []
        for piece in path_parts:
            pieces = piece.split('_')
            parts.append(pieces)

        year = parts[0][0]
        year_df = given_df[given_df['year'] == int(year)]

        found_types = []
        for part in parts[0]:
            if part in types_list:
                found_types.append(part)
                
        if found_types:
            type_string = '_'.join(found_types)
            temp_list = [type_string]*len(year_df)
            graph_type.extend(temp_list)
        

        # appending year
        years_temp_list = year_df['year'].tolist()

        for item in years_temp_list:
            years.append(years_temp_list)
        # appending lek
        leks_temp_list = year_df['lek_id'].tolist()
        #leks.extend(leks_temp_list)
        for lek in leks_temp_list:
            leks.append(lek)
        # appending easting
        easting_temp_list = year_df['x_easting'].tolist()
        x_easting.extend(easting_temp_list)
        # appending northing
        northing_temp_list = year_df['y_northing'].tolist()
        y_northing.extend(northing_temp_list)
        
        #Create the graph from the file
        graph = ig.Graph.Read_Adjacency(file_path)

        # hub scores from be kleinberg's hub scores
        hub_scores.extend(graph.hub_score())

        # stepping stones from betweenness centrality
        centrality_scores.extend(graph.betweenness())
        
        # cutpoints from articulation points
        #articulation_status = []
        #articulation_points = graph.articulation_points()
        #for lek in leks_temp_list:
        #    is_articulation = graph.vs.find(name = lek).index in articulation_points
        #    articulation_status.append(is_articulation)
        #articulation_point.extend(articulation_status)
    
    print(len(leks))
    print(len(x_easting))
    print(len(y_northing))
    print(len(graph_type))
    print(len(years))
    print(len(hub_scores))
    print(len(centrality_scores))
    #print(len(articulation_points))

In [69]:
# importing the data
df = pd.read_csv("E:/!!Research/!!!Data/graph_analysis/lek_data/lek_data_binary_rolling_activity.csv")
# dropping the column from the last time was exported
df.drop('Unnamed: 0', axis = 1, inplace = True)
# Finding only leks active per year
df_active = df[df['active_last_5_years'] == 'TRUE']

average_dispersal_df = node_metrics_df('E:/!!Research/!!!Data/graph_analysis/igraph_graphs/test_folder', df_active)

21
[4624, 4625, 4626, 4627, 4628, 4629, 4630, 4631, 4632, 4633, 4634, 4635, 4636, 4637, 4638, 4639, 4640, 4641, 4642, 4643, 4644]
21
21
21
21
21
21
21


Per cluster metrics