Consolidate code from other scripts. This script is used to generate graphs representing the edges between all known leks. Stats are then generated from this.

In [4]:
import pandas as pd
import igraph as ig
import numpy as np
from pyproj import Transformer
import geopandas as gpd
from shapely.geometry import Point, LineString
import geopandas as gpd
import graph_functions as graf

In [3]:
# importing the data
df = pd.read_csv("E:/!!Research/!!!Data/graph_analysis/lek_data/cleaned_data/unk_loc_leks_data/lek_data_binary_yearly_activity_unk_loc_leks.csv")

# dropping the column from the last time was exported
df.drop(['year', 'activity'], axis = 1, inplace = True)
unique_leks = df.drop_duplicates()

In [9]:
year_graph = graf.imprv_threshold_graph(unique_leks, 75000)
year_graph.write_adjacency(f'E:/!!Research/!!!Data/graph_analysis/igraph_graphs/full_graphs/unk_loc_leks/max_dispersal.adjacency')
print(f'graph created for leks at max dispersal threshold distance')

graph created for leks at max dispersal threshold distance


In [8]:
year_graph = graf.imprv_threshold_graph(unique_leks, 19000)
year_graph.write_adjacency(f'E:/!!Research/!!!Data/graph_analysis/igraph_graphs/full_graphs/unk_loc_leks/avg_dispersal.adjacency')
print(f'graph created for leks at average dispersal threshold distance')

graph created for leks at average dispersal threshold distance


In [7]:
year_graph = graf.imprv_threshold_graph(unique_leks, 13000)
year_graph.write_adjacency(f'E:/!!Research/!!!Data/graph_analysis/igraph_graphs/full_graphs/unk_loc_leks/rnd_trip.adjacency')
print(f'graph created for leks at round trip threshold distance')

graph created for leks at round trip threshold distance


In [6]:
year_graph = graf.imprv_threshold_graph(unique_leks, 4800)
year_graph.write_adjacency(f'E:/!!Research/!!!Data/graph_analysis/igraph_graphs/full_graphs/unk_loc_leks/threshold_48.adjacency')
print(f'graph created for leks at nesting threshold distance')

graph created for leks at nesting threshold distance


In [5]:
year_graph = graf.imprv_threshold_graph(unique_leks, 1600)
year_graph.write_adjacency(f'E:/!!Research/!!!Data/graph_analysis/igraph_graphs/full_graphs/unk_loc_leks/threshold_16.adjacency')
print(f'graph created for leks at 1.6 km threshold distance')

graph created for leks at 1.6 km threshold distance


In [4]:
year_graph = graf.imprv_threshold_graph(unique_leks, 2000)
year_graph.write_adjacency(f'E:/!!Research/!!!Data/graph_analysis/igraph_graphs/full_graphs/unk_loc_leks/threshold_2.adjacency')
print(f'graph created for leks at 2 km threshold distance')

graph created for leks at 2 km threshold distance


Reads in graphs and finds hub and centrality scores for each lek.

In [6]:
import igraph as ig
import pandas as pd
import numpy as np

def all_lek_node_metrics_df(file_path, year_df):
    leks = []  # lek id
    x_easting = []  # lek coordinates
    y_northing = []
    hub_scores = []
    centrality_scores = []

    # Extract leks and coordinates
    leks.extend(year_df['lek_id'].tolist())
    x_easting.extend(year_df['x_easting'].tolist())
    y_northing.extend(year_df['y_northing'].tolist())

    # Create the graph from the adjacency file
    graph = ig.Graph.Read_Adjacency(file_path)

    # Identify connected components
    components = graph.connected_components()

    # Initialize scores with NaN to identify nodes in disconnected components not processed
    hub_score_map = {v.index: float('nan') for v in graph.vs}
    centrality_map = {v.index: float('nan') for v in graph.vs}

    # Process each component separately
    for component in components:
        subgraph = graph.subgraph(component)

        # Compute hub scores and betweenness centrality for the subgraph
        subgraph_hub_scores = subgraph.hub_score()
        subgraph_centrality_scores = subgraph.betweenness()

        # Map back to the original graph
        for i, node_index in enumerate(component):
            hub_score_map[node_index] = subgraph_hub_scores[i]
            centrality_map[node_index] = subgraph_centrality_scores[i]

    # Retrieve scores for each lek in the original graph order
    hub_scores = [hub_score_map[i] for i in range(len(graph.vs))]
    centrality_scores = [centrality_map[i] for i in range(len(graph.vs))]


    # Build the final DataFrame
    data = {
        'lek': leks,
        'x_easting': x_easting,
        'y_northing': y_northing,
        'hub_score': hub_scores,
        'centrality_score': centrality_scores,
    }
    df = pd.DataFrame(data)
    return df

In [7]:
# importing the data
df = pd.read_csv("E:/!!Research/!!!Data/graph_analysis/lek_data/cleaned_data/unk_loc_leks_data/lek_data_binary_yearly_activity_unk_loc_leks.csv")
df.head()

# dropping the column from the last time was exported
df.drop(['year', 'activity'], axis = 1, inplace = True)
unique_leks = df.drop_duplicates()
unique_leks.head()

Unnamed: 0,lek_id,x_easting,y_northing
0,2,672527,3730500
48,4,669625,3729786
96,5,670774,3731230
144,6,673996,3727387
192,7,675163,3728666


In [8]:
# Add the metrics to each of the leks for each distance, then merges them all

# For max dispersal
max_disperse_df = all_lek_node_metrics_df('E:/!!Research/!!!Data/graph_analysis/igraph_graphs/full_graphs/unk_loc_leks/max_dispersal.adjacency', unique_leks)
max_disperse_df2 = max_disperse_df.round(3)
max_disperse_df2 = max_disperse_df2.rename(columns = {"hub_score" : "max_dsp_hub_score", 
                                                      "centrality_score" : "max_dsp_centrality_score"})

# For average dispersal distance
avg_disperse_df = all_lek_node_metrics_df('E:/!!Research/!!!Data/graph_analysis/igraph_graphs/full_graphs/unk_loc_leks/avg_dispersal.adjacency', unique_leks)
avg_disperse_df2 = avg_disperse_df.round(3)
avg_disperse_df2 = avg_disperse_df2.rename(columns = {"hub_score" : "avg_dsp_hub_score", 
                                                      "centrality_score" : "avg_dsp_centrality_score"})

# For average round trip distance
rnd_trip_df = all_lek_node_metrics_df('E:/!!Research/!!!Data/graph_analysis/igraph_graphs/full_graphs/unk_loc_leks/rnd_trip.adjacency', unique_leks)
rnd_trip_df2 = rnd_trip_df.round(3)
rnd_trip_df2 = rnd_trip_df2.rename(columns = {"hub_score" : "rnd_trip_hub_score", 
                                              "centrality_score" : "rnd_trip_centrality_score"})

# For home range
thresh48_all_df = all_lek_node_metrics_df('E:/!!Research/!!!Data/graph_analysis/igraph_graphs/full_graphs/unk_loc_leks/threshold_48.adjacency', unique_leks)
thresh48_all_df2 = thresh48_all_df.round(3)
thresh48_all_df2 = thresh48_all_df2.rename(columns = {"hub_score" : "hr_hub_score", 
                                                      "centrality_score" : "hr_centrality_score"})

thresh15_all_df = all_lek_node_metrics_df('E:/!!Research/!!!Data/graph_analysis/igraph_graphs/full_graphs/unk_loc_leks/threshold_16.adjacency', unique_leks)
thresh15_all_df2 = thresh15_all_df.round(3)
thresh15_all_df2 = thresh15_all_df2.rename(columns = {"hub_score" : "k_hub_score", 
                                                      "centrality_score" : "k_centrality_score"})

thresh2_all_df = all_lek_node_metrics_df('E:/!!Research/!!!Data/graph_analysis/igraph_graphs/full_graphs/unk_loc_leks/threshold_2.adjacency', unique_leks)
thresh2_all_df2 = thresh2_all_df.round(3)
thresh2_all_df2 = thresh2_all_df2.rename(columns = {"hub_score" : "k2_hub_score", 
                                                    "centrality_score" : "k2_centrality_score"})

# merging all
avg_merge = avg_disperse_df2[["lek", "avg_dsp_hub_score", "avg_dsp_centrality_score"]]
rnd_merge = rnd_trip_df2[["lek", "rnd_trip_hub_score", "rnd_trip_centrality_score"]]
hr_merge = thresh48_all_df2[["lek", "hr_hub_score", "hr_centrality_score"]]
k_merge = thresh15_all_df2[["lek", "k_hub_score", "k_centrality_score"]]
k2_merge = thresh2_all_df2[["lek", "k2_hub_score", "k2_centrality_score"]]
all_leks_merge = max_disperse_df2.merge(avg_merge[['lek', 'avg_dsp_hub_score', 'avg_dsp_centrality_score']], how = 'right', on = 'lek')
all_leks_merge = all_leks_merge.merge(rnd_merge[['lek', 'rnd_trip_hub_score', 'rnd_trip_centrality_score']], how = 'right', on = 'lek')
all_leks_merge = all_leks_merge.merge(hr_merge[['lek', 'hr_hub_score', 'hr_centrality_score']], how = 'right', on = 'lek')
all_leks_merge = all_leks_merge.merge(k_merge[['lek', 'k_hub_score', 'k_centrality_score']], how = 'right', on = 'lek')
all_leks_merge = all_leks_merge.merge(k2_merge[['lek', 'k2_hub_score', 'k2_centrality_score']], how = 'right', on = 'lek')

# exporting csv
all_leks_merge.to_csv("E:/!!Research/!!!Data/graph_analysis/igraph_metrics_data/test/all_leks_scores1.csv", index = False)

all_leks_merge.head()


Unnamed: 0,lek,x_easting,y_northing,max_dsp_hub_score,max_dsp_centrality_score,avg_dsp_hub_score,avg_dsp_centrality_score,rnd_trip_hub_score,rnd_trip_centrality_score,hr_hub_score,hr_centrality_score,k_hub_score,k_centrality_score,k2_hub_score,k2_centrality_score
0,2,672527,3730500,0.945,205.105,0.631,191.884,0.711,489.789,0.561,3651.066,0.0,326.0,0.0,1027.52
1,4,669625,3729786,0.958,224.77,0.758,687.098,0.762,1958.549,0.476,21173.86,0.0,83.722,0.0,10.353
2,5,670774,3731230,0.955,219.368,0.684,372.106,0.685,954.002,0.276,265.889,0.0,301.674,0.0,334.802
3,6,673996,3727387,0.931,182.535,0.657,212.46,0.82,258.645,0.952,931.25,0.085,919.897,0.259,9002.776
4,7,675163,3728666,0.922,173.611,0.601,135.845,0.762,620.6,0.77,3350.399,0.002,527.547,0.01,466.184
