Consolidate code from other scripts. This script is used to generate graphs representing the edges between all known leks. Stats are then generated from this.

In [1]:
import pandas as pd
import igraph as ig
import numpy as np
from pyproj import Transformer
import geopandas as gpd
from shapely.geometry import Point, LineString
import geopandas as gpd
import graph_functions as graf

In [56]:
# importing the data
df = pd.read_csv("E:/!!Research/!!!Data/graph_analysis/lek_data/cleaned_data/lek_data_binary_yearly_activity_w_tx.csv")

# dropping the column from the last time was exported
df.drop(['year', 'activity'], axis = 1, inplace = True)
unique_leks = df.drop_duplicates()

In [6]:
year_graph = graf.threshold_graph(unique_leks, 75000)
year_graph.write_adjacency(f'E:/!!Research/!!!Data/graph_analysis/igraph_graphs/full_graphs/max_dispersal.adjacency')
print(f'graph created for leks at max dispersal threshold distance')

graph created for leks at max dispersal threshold distance


In [5]:
year_graph = graf.threshold_graph(unique_leks, 19000)
year_graph.write_adjacency(f'E:/!!Research/!!!Data/graph_analysis/igraph_graphs/full_graphs/avg_dispersal.adjacency')
print(f'graph created for leks at average dispersal threshold distance')

graph created for leks at average dispersal threshold distance


In [4]:
year_graph = graf.threshold_graph(unique_leks, 4800)
year_graph.write_adjacency(f'E:/!!Research/!!!Data/graph_analysis/igraph_graphs/full_graphs/threshold_48.adjacency')
print(f'graph created for leks at nesting threshold distance')

graph created for leks at nesting threshold distance


In [57]:
year_graph = graf.threshold_graph(unique_leks, 1500)
year_graph.write_adjacency(f'E:/!!Research/!!!Data/graph_analysis/igraph_graphs/full_graphs/threshold_15.adjacency')
print(f'graph created for leks at 1.5 km threshold distance')

graph created for leks at 1.5 km threshold distance


In [58]:
year_graph = graf.threshold_graph(unique_leks, 2000)
year_graph.write_adjacency(f'E:/!!Research/!!!Data/graph_analysis/igraph_graphs/full_graphs/threshold_2.adjacency')
print(f'graph created for leks at 2 km threshold distance')

graph created for leks at 2 km threshold distance


Reads in graphs and finds hub and centrality scores for each lek.

In [59]:
import igraph as ig
import pandas as pd

def all_lek_node_metrics_df(file_path, year_df):
    leks = []  # lek id
    x_easting = []  # lek coordinates
    y_northing = []
    hub_scores = []
    centrality_scores = []

    # Extract leks and coordinates
    leks.extend(year_df['lek_id'].tolist())
    x_easting.extend(year_df['x_easting'].tolist())
    y_northing.extend(year_df['y_northing'].tolist())

    # Create the graph from the adjacency file
    graph = ig.Graph.Read_Adjacency(file_path)

    # Identify connected components
    components = graph.connected_components()

    # Initialize scores with NaN to identify nodes in disconnected components not processed
    hub_score_map = {v.index: float('nan') for v in graph.vs}
    centrality_map = {v.index: float('nan') for v in graph.vs}

    # Process each component separately
    for component in components:
        subgraph = graph.subgraph(component)

        # Compute hub scores and betweenness centrality for the subgraph
        subgraph_hub_scores = subgraph.hub_score()
        subgraph_centrality_scores = subgraph.betweenness()

        # Map back to the original graph
        for i, node_index in enumerate(component):
            hub_score_map[node_index] = subgraph_hub_scores[i]
            centrality_map[node_index] = subgraph_centrality_scores[i]

    # Retrieve scores for each lek in the original graph order
    hub_scores = [hub_score_map[i] for i in range(len(graph.vs))]
    centrality_scores = [centrality_map[i] for i in range(len(graph.vs))]

    # Build the final DataFrame
    data = {
        'lek': leks,
        'x_easting': x_easting,
        'y_northing': y_northing,
        'hub_score': hub_scores,
        'centrality_score': centrality_scores
    }
    df = pd.DataFrame(data)
    return df

In [60]:
# importing the data
df = pd.read_csv("E:/!!Research/!!!Data/graph_analysis/lek_data/cleaned_data/lek_data_binary_yearly_activity_w_tx.csv")
df.head()

# dropping the column from the last time was exported
df.drop(['year', 'activity'], axis = 1, inplace = True)
unique_leks = df.drop_duplicates()
unique_leks.head()

Unnamed: 0,lek_id,x_easting,y_northing
0,2,672527,3730500
48,4,669625,3729786
96,5,670774,3731230
144,6,673996,3727387
192,7,675163,3728666


In [61]:
# Add the metrics to each of the leks for each distance, then merges them all

# For max dispersal
max_disperse_df = all_lek_node_metrics_df('E:/!!Research/!!!Data/graph_analysis/igraph_graphs/full_graphs/max_dispersal.adjacency', unique_leks)
max_disperse_df2 = max_disperse_df.round(3)
max_disperse_df2 = max_disperse_df2.rename(columns = {"hub_score" : "max_dsp_hub_score", "centrality_score" : "max_dsp_centrality_score"})

# For average dispersal distance
avg_disperse_df = all_lek_node_metrics_df('E:/!!Research/!!!Data/graph_analysis/igraph_graphs/full_graphs/avg_dispersal.adjacency', unique_leks)
avg_disperse_df2 = avg_disperse_df.round(3)
avg_disperse_df2 = avg_disperse_df2.rename(columns = {"hub_score" : "avg_dsp_hub_score", "centrality_score" : "avg_dsp_centrality_score"})

# For average dispersal distance
thresh48_all_df = all_lek_node_metrics_df('E:/!!Research/!!!Data/graph_analysis/igraph_graphs/full_graphs/threshold_48.adjacency', unique_leks)
thresh48_all_df2 = thresh48_all_df.round(3)
thresh48_all_df2 = thresh48_all_df2.rename(columns = {"hub_score" : "hr_hub_score", "centrality_score" : "hr_centrality_score"})

thresh15_all_df = all_lek_node_metrics_df('E:/!!Research/!!!Data/graph_analysis/igraph_graphs/full_graphs/threshold_15.adjacency', unique_leks)
thresh15_all_df2 = thresh15_all_df.round(3)
thresh15_all_df2 = thresh15_all_df2.rename(columns = {"hub_score" : "k_hub_score", "centrality_score" : "k_centrality_score"})

thresh2_all_df = all_lek_node_metrics_df('E:/!!Research/!!!Data/graph_analysis/igraph_graphs/full_graphs/threshold_2.adjacency', unique_leks)
thresh2_all_df2 = thresh2_all_df.round(3)
thresh2_all_df2 = thresh2_all_df2.rename(columns = {"hub_score" : "k2_hub_score", "centrality_score" : "k2_centrality_score"})

# merging all
avg_merge = avg_disperse_df2[["lek", "avg_dsp_hub_score", "avg_dsp_centrality_score"]]
hr_merge = thresh48_all_df2[["lek", "hr_hub_score", "hr_centrality_score"]]
k_merge = thresh15_all_df2[["lek", "k_hub_score", "k_centrality_score"]]
k2_merge = thresh2_all_df2[["lek", "k2_hub_score", "k2_centrality_score"]]
all_leks_merge = max_disperse_df2.merge(avg_merge[['lek', 'avg_dsp_hub_score', 'avg_dsp_centrality_score']], how = 'right', on = 'lek')
all_leks_merge = all_leks_merge.merge(hr_merge[['lek', 'hr_hub_score', 'hr_centrality_score']], how = 'right', on = 'lek')
all_leks_merge = all_leks_merge.merge(k_merge[['lek', 'k_hub_score', 'k_centrality_score']], how = 'right', on = 'lek')
all_leks_merge = all_leks_merge.merge(k2_merge[['lek', 'k2_hub_score', 'k2_centrality_score']], how = 'right', on = 'lek')

# exporting csv
all_leks_merge.to_csv("E:/!!Research/!!!Data/graph_analysis/igraph_metrics_data/all_leks_metrics/redone/all_leks_scores.csv", index = False)

all_leks_merge.head()


Unnamed: 0,lek,x_easting,y_northing,max_dsp_hub_score,max_dsp_centrality_score,avg_dsp_hub_score,avg_dsp_centrality_score,hr_hub_score,hr_centrality_score,k_hub_score,k_centrality_score,k2_hub_score,k2_centrality_score
0,2,672527,3730500,0.943,167.491,0.636,140.83,0.562,4559.015,0.0,90.0,0.0,705.637
1,4,669625,3729786,0.956,184.784,0.762,561.526,0.479,21864.279,0.0,93.235,0.0,10.003
2,5,670774,3731230,0.953,179.758,0.685,266.422,0.276,20.672,0.0,371.736,0.0,263.417
3,6,673996,3727387,0.928,147.268,0.663,147.307,0.955,654.332,0.074,1106.272,0.259,4138.71
4,7,675163,3728666,0.919,139.995,0.604,85.953,0.772,3050.212,0.0,6.667,0.01,221.201


Stats on activity per lek and percent of years active and inactive

In [62]:
data = "E:/!!Research/!!!Data/graph_analysis/lek_data/cleaned_data/lek_data_binary_yearly_activity_w_tx.csv"
df = pd.read_csv(data)
df = df.rename(columns = {'lek_id' : 'lek'})
df.head()

Unnamed: 0,lek,year,x_easting,y_northing,activity
0,2,1971,672527,3730500,not surveyed
1,2,1972,672527,3730500,not surveyed
2,2,1973,672527,3730500,not surveyed
3,2,1974,672527,3730500,not surveyed
4,2,1975,672527,3730500,not surveyed


In [63]:
# Group by 'lek_id' and aggregate data for each lek
summary = df.groupby('lek').agg(
    y_northing=('y_northing', 'first'),  # Retain the first northings value for each lek
    x_easting=('x_easting', 'first'),    # Retain the first eastings value for each lek
    active_count=('activity', lambda x: (x == 'True').sum()),
    inactive_count=('activity', lambda x: (x == 'False').sum()),
    not_surveyed_count=('activity', lambda x: (x == 'not surveyed').sum()),
    total_years_surveyed=('activity', lambda x: (x != 'not surveyed').sum()),
    #years_between_surveys=('year', lambda x: (x.diff().dropna().mean() if len(x.dropna()) > 1 else None)),
    active_percent=('activity', lambda x: (x == 'True').sum() / (x != 'not surveyed').sum() * 100 if (x != 'not surveyed').sum() > 0 else 0),
    inactive_percent=('activity', lambda x: (x == 'False').sum() / (x != 'not surveyed').sum() * 100 if (x != 'not surveyed').sum() > 0 else 0),
    active_percent_non_surveyed_years=('activity', lambda x: (x == 'True').sum() / ((x == 'False').sum() + (x == 'not surveyed').sum()) * 100 if ((x == 'False').sum() + (x == 'not surveyed').sum()) > 0 else 0),
    inactive_percent_non_surveyed_years=('activity', lambda x: (x == 'False').sum() / ((x == 'True').sum() + (x == 'not surveyed').sum()) * 100 if ((x == 'True').sum() + (x == 'not surveyed').sum()) > 0 else 0)
).reset_index()

summary = summary.round(2)

all_leks_merge_no_coord = all_leks_merge.drop(columns = {'y_northing', 'x_easting'})
df_stats = summary.merge(all_leks_merge_no_coord, how = 'right', on = 'lek')

# Save to CSV
df_stats.to_csv("E:/!!Research/!!!Data/graph_analysis/igraph_metrics_data/all_leks_metrics/redone/all_leks_scores_stats.csv", index=False)

df_stats.head()

Unnamed: 0,lek,y_northing,x_easting,active_count,inactive_count,not_surveyed_count,total_years_surveyed,active_percent,inactive_percent,active_percent_non_surveyed_years,...,max_dsp_hub_score,max_dsp_centrality_score,avg_dsp_hub_score,avg_dsp_centrality_score,hr_hub_score,hr_centrality_score,k_hub_score,k_centrality_score,k2_hub_score,k2_centrality_score
0,2,3730500,672527,4,0,44,4,100.0,0.0,9.09,...,0.943,167.491,0.636,140.83,0.562,4559.015,0.0,90.0,0.0,705.637
1,4,3729786,669625,4,0,44,4,100.0,0.0,9.09,...,0.956,184.784,0.762,561.526,0.479,21864.279,0.0,93.235,0.0,10.003
2,5,3731230,670774,4,0,44,4,100.0,0.0,9.09,...,0.953,179.758,0.685,266.422,0.276,20.672,0.0,371.736,0.0,263.417
3,6,3727387,673996,4,0,44,4,100.0,0.0,9.09,...,0.928,147.268,0.663,147.307,0.955,654.332,0.074,1106.272,0.259,4138.71
4,7,3728666,675163,4,0,44,4,100.0,0.0,9.09,...,0.919,139.995,0.604,85.953,0.772,3050.212,0.0,6.667,0.01,221.201


Exporting graph shapefiles

In [64]:
# importing the data
df = pd.read_csv("E:/!!Research/!!!Data/graph_analysis/lek_data/cleaned_data/lek_data_binary_yearly_activity_w_tx.csv")
df.head()

# dropping the column from the last time was exported
df.drop(['year', 'activity'], axis = 1, inplace = True)
unique_leks = df.drop_duplicates()
unique_leks.head()

Unnamed: 0,lek_id,x_easting,y_northing
0,2,672527,3730500
48,4,669625,3729786
96,5,670774,3731230
144,6,673996,3727387
192,7,675163,3728666


In [66]:
# 1500 m
test_graph = ig.Graph.Read_Adjacency(f'E:/!!Research/!!!Data/graph_analysis/igraph_graphs/full_graphs/threshold_15.adjacency')
#print(test_graph)
graf.edge_shp_export(test_graph, unique_leks, 'E:/!!Research/!!!Data/graph_analysis/spatial_data/all_leks/all_leks_edges_15.shp')

edges exported to shapefile


In [67]:
# 2000 m
test_graph = ig.Graph.Read_Adjacency(f'E:/!!Research/!!!Data/graph_analysis/igraph_graphs/full_graphs/threshold_2.adjacency')
#print(test_graph)
graf.edge_shp_export(test_graph, unique_leks, 'E:/!!Research/!!!Data/graph_analysis/spatial_data/all_leks/all_leks_edges_2.shp')

edges exported to shapefile


In [68]:
# 4800 m
test_graph = ig.Graph.Read_Adjacency('E:/!!Research/!!!Data/graph_analysis/igraph_graphs/full_graphs/threshold_48.adjacency')
#print(test_graph)
graf.edge_shp_export(test_graph, unique_leks, 'E:/!!Research/!!!Data/graph_analysis/spatial_data/all_leks/all_leks_edges_hr.shp')

edges exported to shapefile


In [69]:
# avg_dsp
test_graph = ig.Graph.Read_Adjacency('E:/!!Research/!!!Data/graph_analysis/igraph_graphs/full_graphs/avg_dispersal.adjacency')
#print(test_graph)
graf.edge_shp_export(test_graph, unique_leks, 'E:/!!Research/!!!Data/graph_analysis/spatial_data/all_leks/all_leks_edges_avg_dsp.shp')

edges exported to shapefile


In [70]:
# max_dsp
test_graph = ig.Graph.Read_Adjacency('E:/!!Research/!!!Data/graph_analysis/igraph_graphs/full_graphs/max_dispersal.adjacency')
#print(test_graph)
graf.edge_shp_export(test_graph, unique_leks, 'E:/!!Research/!!!Data/graph_analysis/spatial_data/all_leks/all_leks_edges_max_dsp.shp')

edges exported to shapefile
