Consolidate code from other scripts. This script is used to generate graphs representing the edges between all known leks. Stats are then generated from this.

In [1]:
import pandas as pd
import igraph as ig
import numpy as np
from pyproj import Transformer
import geopandas as gpd
from shapely.geometry import Point, LineString
import geopandas as gpd
import graph_functions as graf

In [3]:
# importing the data
df = pd.read_csv("E:/!!Research/!!!Data/graph_analysis/lek_data/cleaned_data/unk_loc_leks_data/lek_data_binary_yearly_activity_unk_loc_leks.csv")

# dropping the column from the last time was exported
df.drop(['year', 'activity'], axis = 1, inplace = True)
unique_leks = df.drop_duplicates()

In [9]:
year_graph = graf.imprv_threshold_graph(unique_leks, 75000)
year_graph.write_adjacency(f'E:/!!Research/!!!Data/graph_analysis/igraph_graphs/full_graphs/unk_loc_leks/max_dispersal.adjacency')
print(f'graph created for leks at max dispersal threshold distance')

graph created for leks at max dispersal threshold distance


In [8]:
year_graph = graf.imprv_threshold_graph(unique_leks, 19000)
year_graph.write_adjacency(f'E:/!!Research/!!!Data/graph_analysis/igraph_graphs/full_graphs/unk_loc_leks/avg_dispersal.adjacency')
print(f'graph created for leks at average dispersal threshold distance')

graph created for leks at average dispersal threshold distance


In [7]:
year_graph = graf.imprv_threshold_graph(unique_leks, 13000)
year_graph.write_adjacency(f'E:/!!Research/!!!Data/graph_analysis/igraph_graphs/full_graphs/unk_loc_leks/rnd_trip.adjacency')
print(f'graph created for leks at round trip threshold distance')

graph created for leks at round trip threshold distance


In [6]:
year_graph = graf.imprv_threshold_graph(unique_leks, 4800)
year_graph.write_adjacency(f'E:/!!Research/!!!Data/graph_analysis/igraph_graphs/full_graphs/unk_loc_leks/threshold_48.adjacency')
print(f'graph created for leks at nesting threshold distance')

graph created for leks at nesting threshold distance


In [5]:
year_graph = graf.imprv_threshold_graph(unique_leks, 1500)
year_graph.write_adjacency(f'E:/!!Research/!!!Data/graph_analysis/igraph_graphs/full_graphs/unk_loc_leks/threshold_15.adjacency')
print(f'graph created for leks at 1.5 km threshold distance')

graph created for leks at 1.5 km threshold distance


In [4]:
year_graph = graf.imprv_threshold_graph(unique_leks, 2000)
year_graph.write_adjacency(f'E:/!!Research/!!!Data/graph_analysis/igraph_graphs/full_graphs/unk_loc_leks/threshold_2.adjacency')
print(f'graph created for leks at 2 km threshold distance')

graph created for leks at 2 km threshold distance


Reads in graphs and finds hub and centrality scores for each lek.

In [10]:
import igraph as ig
import pandas as pd
import numpy as np

def all_lek_node_metrics_df(file_path, year_df):
    leks = []  # lek id
    x_easting = []  # lek coordinates
    y_northing = []
    hub_scores = []
    centrality_scores = []
    is_hub = []
    is_step_stone = []

    # Extract leks and coordinates
    leks.extend(year_df['lek_id'].tolist())
    x_easting.extend(year_df['x_easting'].tolist())
    y_northing.extend(year_df['y_northing'].tolist())

    # Create the graph from the adjacency file
    graph = ig.Graph.Read_Adjacency(file_path)

    # Identify connected components
    components = graph.connected_components()

    # Initialize scores with NaN to identify nodes in disconnected components not processed
    hub_score_map = {v.index: float('nan') for v in graph.vs}
    centrality_map = {v.index: float('nan') for v in graph.vs}

    # Same process but getting ready to indicate if hub or not
    is_hub_map = {v.index: float('nan') for v in graph.vs}
    is_step_stone_map = {v.index: float('nan') for v in graph.vs}

    # Process each component separately
    for component in components:
        subgraph = graph.subgraph(component)

        # Compute hub scores and betweenness centrality for the subgraph
        subgraph_hub_scores = subgraph.hub_score()
        subgraph_centrality_scores = subgraph.betweenness()

        subgraph_is_hub = []
        subgraph_is_step_stone = []

        if len(subgraph_hub_scores) > 2:
            subgraph_is_hub_temp = (subgraph_hub_scores >= np.quantile(subgraph_hub_scores, 0.9)).tolist()
            subgraph_is_step_stone_temp = (subgraph_centrality_scores >= np.quantile(subgraph_centrality_scores, 0.9)).tolist()
            subgraph_is_hub.extend(subgraph_is_hub_temp)
            subgraph_is_step_stone.extend(subgraph_is_step_stone_temp)
        else:
            subgraph_is_hub_temp = [False] * len(subgraph_hub_scores)
            subgraph_is_step_stone_temp = [False] * len(subgraph_centrality_scores)
            subgraph_is_hub.extend(subgraph_is_hub_temp)
            subgraph_is_step_stone.extend(subgraph_is_step_stone_temp)

        # Map back to the original graph
        for i, node_index in enumerate(component):
            hub_score_map[node_index] = subgraph_hub_scores[i]
            centrality_map[node_index] = subgraph_centrality_scores[i]

            is_hub_map[node_index] = subgraph_is_hub[i]
            is_step_stone_map[node_index] = subgraph_is_step_stone[i]

    # Retrieve scores for each lek in the original graph order
    hub_scores = [hub_score_map[i] for i in range(len(graph.vs))]
    centrality_scores = [centrality_map[i] for i in range(len(graph.vs))]

    hub_temp = [is_hub_map[i] for i in range(len(graph.vs))]

    step_stone_temp = [is_step_stone_map[i] for i in range(len(graph.vs))]

    is_hub.extend(hub_temp)

    is_step_stone.extend(step_stone_temp)


    # Build the final DataFrame
    data = {
        'lek': leks,
        'x_easting': x_easting,
        'y_northing': y_northing,
        'hub_score': hub_scores,
        'centrality_score': centrality_scores,
        'is_hub' : is_hub,
        'is_step_stone' : is_step_stone
    }
    df = pd.DataFrame(data)
    return df

In [12]:
# importing the data
df = pd.read_csv("E:/!!Research/!!!Data/graph_analysis/lek_data/cleaned_data/unk_loc_leks_data/lek_data_binary_yearly_activity_unk_loc_leks.csv")
df.head()

# dropping the column from the last time was exported
df.drop(['year', 'activity'], axis = 1, inplace = True)
unique_leks = df.drop_duplicates()
unique_leks.head()

Unnamed: 0,lek_id,x_easting,y_northing
0,2,672527,3730500
48,4,669625,3729786
96,5,670774,3731230
144,6,673996,3727387
192,7,675163,3728666


In [13]:
# Add the metrics to each of the leks for each distance, then merges them all

# For max dispersal
max_disperse_df = all_lek_node_metrics_df('E:/!!Research/!!!Data/graph_analysis/igraph_graphs/full_graphs/unk_loc_leks/max_dispersal.adjacency', unique_leks)
max_disperse_df2 = max_disperse_df.round(3)
max_disperse_df2 = max_disperse_df2.rename(columns = {"hub_score" : "max_dsp_hub_score", 
                                                      "centrality_score" : "max_dsp_centrality_score",
                                                      "is_hub" : "max_dsp_is_hub",
                                                      "is_step_stone" : "max_dsp_is_step_stone"})

# For average dispersal distance
avg_disperse_df = all_lek_node_metrics_df('E:/!!Research/!!!Data/graph_analysis/igraph_graphs/full_graphs/unk_loc_leks/avg_dispersal.adjacency', unique_leks)
avg_disperse_df2 = avg_disperse_df.round(3)
avg_disperse_df2 = avg_disperse_df2.rename(columns = {"hub_score" : "avg_dsp_hub_score", 
                                                      "centrality_score" : "avg_dsp_centrality_score",
                                                      "is_hub" : "avg_dsp_is_hub",
                                                      "is_step_stone" : "avg_dsp_is_step_stone"})

# For average round trip distance
rnd_trip_df = all_lek_node_metrics_df('E:/!!Research/!!!Data/graph_analysis/igraph_graphs/full_graphs/unk_loc_leks/rnd_trip.adjacency', unique_leks)
rnd_trip_df2 = rnd_trip_df.round(3)
rnd_trip_df2 = rnd_trip_df2.rename(columns = {"hub_score" : "rnd_trip_hub_score", 
                                              "centrality_score" : "rnd_trip_centrality_score",
                                              "is_hub" : "rnd_trip_is_hub",
                                                "is_step_stone" : "rnd_trip_is_step_stone"})

# For home range
thresh48_all_df = all_lek_node_metrics_df('E:/!!Research/!!!Data/graph_analysis/igraph_graphs/full_graphs/unk_loc_leks/threshold_48.adjacency', unique_leks)
thresh48_all_df2 = thresh48_all_df.round(3)
thresh48_all_df2 = thresh48_all_df2.rename(columns = {"hub_score" : "hr_hub_score", 
                                                      "centrality_score" : "hr_centrality_score",
                                                      "is_hub" : "hr_is_hub",
                                                      "is_step_stone" : "hr_is_step_stone"})

thresh15_all_df = all_lek_node_metrics_df('E:/!!Research/!!!Data/graph_analysis/igraph_graphs/full_graphs/unk_loc_leks/threshold_15.adjacency', unique_leks)
thresh15_all_df2 = thresh15_all_df.round(3)
thresh15_all_df2 = thresh15_all_df2.rename(columns = {"hub_score" : "k_hub_score", 
                                                      "centrality_score" : "k_centrality_score",
                                                      "is_hub" : "k_is_hub",
                                                      "is_step_stone" : "k_is_step_stone"})

thresh2_all_df = all_lek_node_metrics_df('E:/!!Research/!!!Data/graph_analysis/igraph_graphs/full_graphs/unk_loc_leks/threshold_2.adjacency', unique_leks)
thresh2_all_df2 = thresh2_all_df.round(3)
thresh2_all_df2 = thresh2_all_df2.rename(columns = {"hub_score" : "k2_hub_score", 
                                                    "centrality_score" : "k2_centrality_score",
                                                    "is_hub" : "k2_is_hub",
                                                    "is_step_stone" : "k2_is_step_stone"})

# merging all
avg_merge = avg_disperse_df2[["lek", "avg_dsp_hub_score", "avg_dsp_centrality_score", "avg_dsp_is_hub", "avg_dsp_is_step_stone"]]
rnd_merge = rnd_trip_df2[["lek", "rnd_trip_hub_score", "rnd_trip_centrality_score", "rnd_trip_is_hub", "rnd_trip_is_step_stone"]]
hr_merge = thresh48_all_df2[["lek", "hr_hub_score", "hr_centrality_score", "hr_is_hub", "hr_is_step_stone"]]
k_merge = thresh15_all_df2[["lek", "k_hub_score", "k_centrality_score", "k_is_hub", "k_is_step_stone"]]
k2_merge = thresh2_all_df2[["lek", "k2_hub_score", "k2_centrality_score", "k2_is_hub", "k2_is_step_stone"]]
all_leks_merge = max_disperse_df2.merge(avg_merge[['lek', 'avg_dsp_hub_score', 'avg_dsp_centrality_score', 
                                                   "avg_dsp_is_hub", "avg_dsp_is_step_stone"]], how = 'right', on = 'lek')
all_leks_merge = all_leks_merge.merge(rnd_merge[['lek', 'rnd_trip_hub_score', 'rnd_trip_centrality_score', 
                                                 "rnd_trip_is_hub", "rnd_trip_is_step_stone"]], how = 'right', on = 'lek')
all_leks_merge = all_leks_merge.merge(hr_merge[['lek', 'hr_hub_score', 'hr_centrality_score', 
                                                "hr_is_hub", "hr_is_step_stone"]], how = 'right', on = 'lek')
all_leks_merge = all_leks_merge.merge(k_merge[['lek', 'k_hub_score', 'k_centrality_score', 
                                               "k_is_hub", "k_is_step_stone"]], how = 'right', on = 'lek')
all_leks_merge = all_leks_merge.merge(k2_merge[['lek', 'k2_hub_score', 'k2_centrality_score', 
                                                "k2_is_hub", "k2_is_step_stone"]], how = 'right', on = 'lek')

# exporting csv
all_leks_merge.to_csv("E:/!!Research/!!!Data/graph_analysis/igraph_metrics_data/all_leks_metrics/redone/unk_loc_leks/all_leks_scores.csv", index = False)

all_leks_merge.head()


Unnamed: 0,lek,x_easting,y_northing,max_dsp_hub_score,max_dsp_centrality_score,max_dsp_is_hub,max_dsp_is_step_stone,avg_dsp_hub_score,avg_dsp_centrality_score,avg_dsp_is_hub,...,hr_is_hub,hr_is_step_stone,k_hub_score,k_centrality_score,k_is_hub,k_is_step_stone,k2_hub_score,k2_centrality_score,k2_is_hub,k2_is_step_stone
0,2,672527,3730500,0.945,205.105,False,False,0.631,191.884,False,...,True,False,0.0,92.0,False,False,0.0,1027.52,False,False
1,4,669625,3729786,0.958,224.77,False,False,0.758,687.098,False,...,True,False,0.0,94.911,False,False,0.0,10.353,False,False
2,5,670774,3731230,0.955,219.368,False,False,0.684,372.106,False,...,True,False,0.0,379.546,False,False,0.0,334.802,False,False
3,6,673996,3727387,0.931,182.535,False,False,0.657,212.46,False,...,True,False,0.074,1351.856,False,True,0.259,9002.776,False,False
4,7,675163,3728666,0.922,173.611,False,False,0.601,135.845,False,...,True,False,0.0,10.0,False,False,0.01,466.184,False,False


Stats on activity per lek and percent of years active and inactive

In [14]:
data = "E:/!!Research/!!!Data/graph_analysis/lek_data/cleaned_data/unk_loc_leks_data/lek_data_binary_yearly_activity_unk_loc_leks.csv"
df = pd.read_csv(data)
df = df.rename(columns = {'lek_id' : 'lek'})
df.head()

Unnamed: 0,lek,year,x_easting,y_northing,activity
0,2,1971,672527,3730500,not surveyed
1,2,1972,672527,3730500,not surveyed
2,2,1973,672527,3730500,not surveyed
3,2,1974,672527,3730500,not surveyed
4,2,1975,672527,3730500,not surveyed


In [15]:
# Group by 'lek_id' and aggregate data for each lek
summary = df.groupby('lek').agg(
    y_northing=('y_northing', 'first'),  # Retain the first northings value for each lek
    x_easting=('x_easting', 'first'),    # Retain the first eastings value for each lek
    active_count=('activity', lambda x: (x == 'True').sum()),
    inactive_count=('activity', lambda x: (x == 'False').sum()),
    not_surveyed_count=('activity', lambda x: (x == 'not surveyed').sum()),
    total_years_surveyed=('activity', lambda x: (x != 'not surveyed').sum()),
    #years_between_surveys=('year', lambda x: (x.diff().dropna().mean() if len(x.dropna()) > 1 else None)),
    active_percent=('activity', lambda x: (x == 'True').sum() / (x != 'not surveyed').sum() * 100 if (x != 'not surveyed').sum() > 0 else 0),
    inactive_percent=('activity', lambda x: (x == 'False').sum() / (x != 'not surveyed').sum() * 100 if (x != 'not surveyed').sum() > 0 else 0),
    active_percent_non_surveyed_years=('activity', lambda x: (x == 'True').sum() / ((x == 'False').sum() + (x == 'not surveyed').sum()) * 100 if ((x == 'False').sum() + (x == 'not surveyed').sum()) > 0 else 0),
    inactive_percent_non_surveyed_years=('activity', lambda x: (x == 'False').sum() / ((x == 'True').sum() + (x == 'not surveyed').sum()) * 100 if ((x == 'True').sum() + (x == 'not surveyed').sum()) > 0 else 0)
).reset_index()

summary = summary.round(2)

all_leks_merge_no_coord = all_leks_merge.drop(columns = {'y_northing', 'x_easting'})
df_stats = summary.merge(all_leks_merge_no_coord, how = 'right', on = 'lek')

# Save to CSV
df_stats.to_csv("E:/!!Research/!!!Data/graph_analysis/igraph_metrics_data/all_leks_metrics/redone/unk_loc_leks/all_leks_scores_stats_unk_loc_leks.csv", index=False)

df_stats.head()

Unnamed: 0,lek,y_northing,x_easting,active_count,inactive_count,not_surveyed_count,total_years_surveyed,active_percent,inactive_percent,active_percent_non_surveyed_years,...,hr_is_hub,hr_is_step_stone,k_hub_score,k_centrality_score,k_is_hub,k_is_step_stone,k2_hub_score,k2_centrality_score,k2_is_hub,k2_is_step_stone
0,2,3730500,672527,4,0,44,4,100.0,0.0,9.09,...,True,False,0.0,92.0,False,False,0.0,1027.52,False,False
1,4,3729786,669625,4,0,44,4,100.0,0.0,9.09,...,True,False,0.0,94.911,False,False,0.0,10.353,False,False
2,5,3731230,670774,4,0,44,4,100.0,0.0,9.09,...,True,False,0.0,379.546,False,False,0.0,334.802,False,False
3,6,3727387,673996,4,0,44,4,100.0,0.0,9.09,...,True,False,0.074,1351.856,False,True,0.259,9002.776,False,False
4,7,3728666,675163,4,0,44,4,100.0,0.0,9.09,...,True,False,0.0,10.0,False,False,0.01,466.184,False,False


Exporting graph shapefiles

In [16]:
# importing the data
df = pd.read_csv("E:/!!Research/!!!Data/graph_analysis/lek_data/cleaned_data/unk_loc_leks_data/lek_data_binary_yearly_activity_unk_loc_leks.csv")
df.head()

# dropping the column from the last time was exported
df.drop(['year', 'activity'], axis = 1, inplace = True)
unique_leks = df.drop_duplicates()
unique_leks.head()

Unnamed: 0,lek_id,x_easting,y_northing
0,2,672527,3730500
48,4,669625,3729786
96,5,670774,3731230
144,6,673996,3727387
192,7,675163,3728666


In [None]:
# 1500 m
test_graph = ig.Graph.Read_Adjacency(f'E:/!!Research/!!!Data/graph_analysis/igraph_graphs/full_graphs/unk_loc_leks/threshold_15.adjacency')
#print(test_graph)
graf.edge_shp_export(test_graph, unique_leks, 'E:/!!Research/!!!Data/graph_analysis/spatial_data/all_leks/unk_loc_leks/all_leks_edges_15.shp')

In [67]:
# 2000 m
test_graph = ig.Graph.Read_Adjacency(f'E:/!!Research/!!!Data/graph_analysis/igraph_graphs/full_graphs/unk_loc_leks/threshold_2.adjacency')
#print(test_graph)
graf.edge_shp_export(test_graph, unique_leks, 'E:/!!Research/!!!Data/graph_analysis/spatial_data/all_leks/unk_loc_leks/all_leks_edges_2.shp')

edges exported to shapefile


In [68]:
# 4800 m
test_graph = ig.Graph.Read_Adjacency('E:/!!Research/!!!Data/graph_analysis/igraph_graphs/full_graphs/unk_loc_leks/threshold_48.adjacency')
#print(test_graph)
graf.edge_shp_export(test_graph, unique_leks, 'E:/!!Research/!!!Data/graph_analysis/spatial_data/all_leks/unk_loc_leks/all_leks_edges_hr.shp')

edges exported to shapefile


In [22]:
# round trip
test_graph = ig.Graph.Read_Adjacency('E:/!!Research/!!!Data/graph_analysis/igraph_graphs/full_graphs/unk_loc_leks/rnd_trip.adjacency')
graf.edge_shp_export(test_graph, unique_leks, 'E:/!!Research/!!!Data/graph_analysis/spatial_data/all_leks/unk_loc_leks/all_leks_edges_rnd_trip.shp')

edges exported to shapefile


In [69]:
# avg_dsp
test_graph = ig.Graph.Read_Adjacency('E:/!!Research/!!!Data/graph_analysis/igraph_graphs/full_graphs/unk_loc_leks/avg_dispersal.adjacency')
#print(test_graph)
graf.edge_shp_export(test_graph, unique_leks, 'E:/!!Research/!!!Data/graph_analysis/spatial_data/all_leks/unk_loc_leks/all_leks_edges_avg_dsp.shp')

edges exported to shapefile


In [70]:
# max_dsp
test_graph = ig.Graph.Read_Adjacency('E:/!!Research/!!!Data/graph_analysis/igraph_graphs/full_graphs/unk_loc_leks/max_dispersal.adjacency')
#print(test_graph)
graf.edge_shp_export(test_graph, unique_leks, 'E:/!!Research/!!!Data/graph_analysis/spatial_data/all_leks/unk_loc_leks/all_leks_edges_max_dsp.shp')

edges exported to shapefile


Graphs for each of the active leks each year.

In [1]:
import pandas as pd
import igraph as ig
import numpy as np
from pyproj import Transformer
import geopandas as gpd
from shapely.geometry import Point, LineString
import geopandas as gpd
import graph_functions as graf

In [8]:
# importing the data
df = pd.read_csv("E:/!!Research/!!!Data/graph_analysis/lek_data/cleaned_data/unk_loc_leks_data/lek_data_binary_yearly_activity_unk_loc_leks.csv")
df = df.sort_values(by='year')
df_active = df[df['activity'] == 'True']
unique_years = df_active['year'].unique()
df_active.head()

Unnamed: 0,lek_id,year,x_easting,y_northing,activity
3264,4627,1971,612215,3712342,True
3216,4626,1971,610855,3710424,True
3360,4629,1971,612650,3701429,True
3408,4630,1971,609555,3699909,True
4032,4643,1971,608985,3713768,True


In [9]:
base_directory = 'E:/!!Research/!!!Data/graph_analysis/igraph_graphs/new_active_only'

In [10]:
for i in range(len(unique_years)):
    #print(unique_years[i])
    year_df = df_active[df_active['year'] == unique_years[i]]
    year_graph = graf.imprv_threshold_graph(year_df, 75000)
    year_graph.write_adjacency(f'{base_directory}/max_dsp/{unique_years[i]}_max_dsp.adjacency')
    if ((i + 1) % 10) == 0:
        print(f'graph created for active leks in {unique_years[i]} at max dispersal threshold distance')
    elif (i + 1) == len(unique_years):
        print('graphs completed for given threshold')
    else:
        pass

for i in range(len(unique_years)):
    #print(unique_years[i])
    year_df = df_active[df_active['year'] == unique_years[i]]
    year_graph = graf.imprv_threshold_graph(year_df, 19000)
    year_graph.write_adjacency(f'{base_directory}/avg_dsp/{unique_years[i]}_avg_dsp.adjacency')
    if ((i + 1) % 10) == 0:
        print(f'graph created for active leks in {unique_years[i]} at average dispersal threshold distance')
    elif (i + 1) == len(unique_years):
        print('graphs completed for given threshold')
    else:
        pass

for i in range(len(unique_years)):
    #print(unique_years[i])
    year_df = df_active[df_active['year'] == unique_years[i]]
    year_graph = graf.imprv_threshold_graph(year_df, 13000)
    year_graph.write_adjacency(f'{base_directory}/rnd_trip/{unique_years[i]}_rnd_trip.adjacency')
    if ((i + 1) % 10) == 0:
        print(f'graph created for active leks in {unique_years[i]} at round trip threshold distance')
    elif (i + 1) == len(unique_years):
        print('graphs completed for given threshold')
    else:
        pass

for i in range(len(unique_years)):
    #print(unique_years[i])
    year_df = df_active[df_active['year'] == unique_years[i]]
    year_graph = graf.imprv_threshold_graph(year_df, 4800)
    year_graph.write_adjacency(f'{base_directory}/threshold_48/{unique_years[i]}_48.adjacency')
    if ((i + 1) % 10) == 0:
        print(f'graph created for active leks in {unique_years[i]} at 4800 meters threshold distance')
    elif (i + 1) == len(unique_years):
        print('graphs completed for given threshold')
    else:
        pass

for i in range(len(unique_years)):
    #print(unique_years[i])
    year_df = df_active[df_active['year'] == unique_years[i]]
    year_graph = graf.imprv_threshold_graph(year_df, 2000)
    year_graph.write_adjacency(f'{base_directory}/threshold_2/{unique_years[i]}_2.adjacency')
    if ((i + 1) % 10) == 0:
        print(f'graph created for active leks in {unique_years[i]} at 2000 meters threshold distance')
    elif (i + 1) == len(unique_years):
        print('graphs completed for given threshold')
    else:
        pass

for i in range(len(unique_years)):
    #print(unique_years[i])
    year_df = df_active[df_active['year'] == unique_years[i]]
    year_graph = graf.imprv_threshold_graph(year_df, 1500)
    year_graph.write_adjacency(f'{base_directory}/threshold_15/{unique_years[i]}_15.adjacency')
    if ((i + 1) % 10) == 0:
        print(f'graph created for active leks in {unique_years[i]} 1500 meters threshold distance')
    elif (i + 1) == len(unique_years):
        print('graphs completed for given threshold')
    else:
        pass

graph created for active leks in 1980 at max dispersal threshold distance
graph created for active leks in 1990 at max dispersal threshold distance
graph created for active leks in 2000 at max dispersal threshold distance
graph created for active leks in 2010 at max dispersal threshold distance
graphs completed for given threshold
graph created for active leks in 1980 at average dispersal threshold distance
graph created for active leks in 1990 at average dispersal threshold distance
graph created for active leks in 2000 at average dispersal threshold distance
graph created for active leks in 2010 at average dispersal threshold distance
graphs completed for given threshold
graph created for active leks in 1980 at round trip threshold distance
graph created for active leks in 1990 at round trip threshold distance
graph created for active leks in 2000 at round trip threshold distance
graph created for active leks in 2010 at round trip threshold distance
graphs completed for given thresho

Finding betweenness centrality per year

In [4]:
import igraph as ig
import os
import numpy as np
import pandas as pd

In [7]:
def node_metrics_df(given_directory, given_df):
    # graph_type - already have a list of keywords to look for
    types_list = ['avg', 'dsp', 'rnd', 'trip', '48', 'max', '15', '2']
    leks = [] # lek id
    # lek coordinates
    x_easting = []
    y_northing = []
    
    graph_type = [] # graph type
    years = [] # to easily find the year
    hub_scores = []
    centrality_scores = []
    
    ###########
    is_hub = []
    is_step_stone = []
    ###########



    adjacency_files = [f for f in os.listdir(f'{given_directory}') if f.endswith('.adjacency')]

    for adj_file in adjacency_files:
        file_path = f'{given_directory}/{adj_file}'
        path_parts = adj_file.split('.')
        parts = []
        for piece in path_parts:
            pieces = piece.split('_')
            parts.append(pieces)

        year = parts[0][0]
        year_df = given_df[given_df['year'] == int(year)]

        found_types = []
        for part in parts[0]:
            if part in types_list:
                found_types.append(part)
                
        if found_types:
            type_string = '_'.join(found_types)
            temp_list = [type_string]*len(year_df)
            graph_type.extend(temp_list)
        

        # appending year
        years_temp_list = year_df['year'].tolist()
        years.extend(years_temp_list)

        #for item in years_temp_list:
        #    years.append(years_temp_list)
        # appending lek
        leks_temp_list = year_df['lek_id'].tolist()
        #leks.extend(leks_temp_list)
        for lek in leks_temp_list:
            leks.append(lek)
        # appending easting
        easting_temp_list = year_df['x_easting'].tolist()
        x_easting.extend(easting_temp_list)
        # appending northing
        northing_temp_list = year_df['y_northing'].tolist()
        y_northing.extend(northing_temp_list)
        
        #Create the graph from the file
        graph = ig.Graph.Read_Adjacency(file_path)

        # Identify connected components
        components = graph.connected_components()

        # Initialize scores with NaN to identify nodes in disconnected components not processed
        hub_score_map = {v.index: float('nan') for v in graph.vs}
        centrality_map = {v.index: float('nan') for v in graph.vs}
        
        #############
        # Same process but getting ready to indicate if hub or not
        is_hub_map = {v.index: float('nan') for v in graph.vs}
        is_step_stone_map = {v.index: float('nan') for v in graph.vs}
        #############

        # Process each component separately
        for component in components:
            subgraph = graph.subgraph(component)

            # Compute hub scores and betweenness centrality for the subgraph
            subgraph_hub_scores = subgraph.hub_score()
            subgraph_centrality_scores = subgraph.betweenness()
            
            ###########
            subgraph_is_hub = []
            subgraph_is_step_stone = []

            if len(subgraph_hub_scores) > 2:
                subgraph_is_hub_temp = (subgraph_hub_scores >= np.quantile(subgraph_hub_scores, 0.9)).tolist()
                subgraph_is_step_stone_temp = (subgraph_centrality_scores >= np.quantile(subgraph_centrality_scores, 0.9)).tolist()
                subgraph_is_hub.extend(subgraph_is_hub_temp)
                subgraph_is_step_stone.extend(subgraph_is_step_stone_temp)
            else:
                subgraph_is_hub_temp = [False] * len(subgraph_hub_scores)
                subgraph_is_step_stone_temp = [False] * len(subgraph_centrality_scores)
                subgraph_is_hub.extend(subgraph_is_hub_temp)
                subgraph_is_step_stone.extend(subgraph_is_step_stone_temp)
            ###########

            # Map back to the original graph
            for i, node_index in enumerate(component):
                hub_score_map[node_index] = subgraph_hub_scores[i]
                centrality_map[node_index] = subgraph_centrality_scores[i]

                ###########
                is_hub_map[node_index] = subgraph_is_hub[i]
                is_step_stone_map[node_index] = subgraph_is_step_stone[i]
                ###########

        # Retrieve scores for each lek in the original graph order
        hub_scores_temp = [hub_score_map[i] for i in range(len(graph.vs))]
        #print(len(hub_scores))
        centrality_scores_temp = [centrality_map[i] for i in range(len(graph.vs))]
        #print(len(centrality_scores))

        ############
        hub_temp = [is_hub_map[i] for i in range(len(graph.vs))]

        step_stone_temp = [is_step_stone_map[i] for i in range(len(graph.vs))]
        ############

        # hub scores from be kleinberg's hub scores
        hub_scores.extend(hub_scores_temp)

        # stepping stones from betweenness centrality
        centrality_scores.extend(centrality_scores_temp)

        ############
        is_hub.extend(hub_temp)

        is_step_stone.extend(step_stone_temp)
        ############
    
    
    data = {'lek' : leks,
            'x_easting' : x_easting,
            'y_northing' : y_northing,
            'graph_type' : graph_type,
            'year' : years,
            'hub_score' : hub_scores,
            'centrality_score' : centrality_scores,
            'is_hub' : is_hub,
            'is_step_stone' : is_step_stone
            }
    df = pd.DataFrame(data)
    return df

In [8]:
# importing the data
df = pd.read_csv("E:/!!Research/!!!Data/graph_analysis/lek_data/cleaned_data/lek_data_binary_yearly_activity_w_tx.csv")
# dropping the column from the last time was exported
# Finding only leks active per year
df_active = df[df['activity'] == 'True']

graph_base_dir = 'E:/!!Research/!!!Data/graph_analysis/igraph_graphs/new_active_only'
# creating df for each type of graph
average_dispersal_df = node_metrics_df(f'{graph_base_dir}/avg_dsp', df_active)
round_trip_df = node_metrics_df(f'{graph_base_dir}/rnd_trip', df_active)
thresh48_df = node_metrics_df(f'{graph_base_dir}/threshold_48', df_active)
max_disperse_df = node_metrics_df(f'{graph_base_dir}/max_dsp', df_active)
thresh_2_df = node_metrics_df(f'{graph_base_dir}/threshold_2', df_active)
thresh_15_df = node_metrics_df(f'{graph_base_dir}/threshold_15', df_active)

# merging into final csv
frames = [max_disperse_df, average_dispersal_df, round_trip_df, thresh48_df, thresh_2_df, thresh_15_df]
frame_concat = pd.concat(frames)
frame_concat_rnd = frame_concat.round(3)
frame_concat_rnd.to_csv("E:/!!Research/!!!Data/graph_analysis/igraph_metrics_data/all_leks_metrics/redone/yearly_centrality_scores.csv", index = False)

Ranking stepping stones

In [13]:
import pandas as pd
csv_path = "E:/!!Research/!!!Data/graph_analysis/igraph_metrics_data/all_leks_metrics/redone/yearly_centrality_scores.csv"
df = pd.read_csv(csv_path)
df.head()

Unnamed: 0,lek,x_easting,y_northing,graph_type,year,hub_score,centrality_score,is_hub,is_step_stone
0,4624,608585,3702344,max_dsp,1971,1.0,0.0,True,True
1,4625,609605,3707214,max_dsp,1971,1.0,0.0,True,True
2,4626,610855,3710424,max_dsp,1971,1.0,0.0,True,True
3,4627,612215,3712342,max_dsp,1971,1.0,0.0,True,True
4,4628,612780,3702469,max_dsp,1971,1.0,0.0,True,True


In [15]:
df_bc_ad = df[df['graph_type'] == 'max_dsp']


# Count how many times each lek has been a hub, the number of active years, and exceedances of the 90th quantile
hub_counts_bc_ad = df_bc_ad.groupby(['lek', 'x_easting', 'y_northing']).agg(
    hub_count=('is_hub', 'sum'),
    step_stone_count = ('is_step_stone', 'sum'),
    active_years=('centrality_score', 'count'),  # Counts active years
).reset_index()


# Step 3: Save to CSV
hub_counts_bc_ad.to_csv('E:/!!Research/!!!Data/graph_analysis/igraph_metrics_data/all_leks_metrics/redone/max_dsp_summed_stepping_stones.csv', index=False)

hub_counts_bc_ad.tail()

Unnamed: 0,lek,x_easting,y_northing,hub_count,step_stone_count,active_years
1300,64850,668351,3860468,0,0,1
1301,66666,650336,3722341,0,0,6
1302,77777,651984,3722855,0,0,3
1303,88888,652426,3718903,0,0,2
1304,99999,649863,3714293,2,2,4


In [10]:
df_bc_ad = df[df['graph_type'] == 'avg_dsp']


# Count how many times each lek has been a hub, the number of active years, and exceedances of the 90th quantile
hub_counts_bc_ad = df_bc_ad.groupby(['lek', 'x_easting', 'y_northing']).agg(
    hub_count=('is_hub', 'sum'),
    step_stone_count = ('is_step_stone', 'sum'),
    active_years=('centrality_score', 'count'),  # Counts active years
).reset_index()


# Step 3: Save to CSV
hub_counts_bc_ad.to_csv('E:/!!Research/!!!Data/graph_analysis/igraph_metrics_data/all_leks_metrics/redone/avg_dsp_summed_stepping_stones.csv', index=False)

hub_counts_bc_ad.tail()

Unnamed: 0,lek,x_easting,y_northing,hub_count,step_stone_count,active_years
1300,64850,668351,3860468,0,0,1
1301,66666,650336,3722341,2,0,6
1302,77777,651984,3722855,0,0,3
1303,88888,652426,3718903,1,0,2
1304,99999,649863,3714293,1,1,4


In [16]:
df_bc_ad = df[df['graph_type'] == 'rnd_trip']


# Count how many times each lek has been a hub, the number of active years, and exceedances of the 90th quantile
hub_counts_bc_ad = df_bc_ad.groupby(['lek', 'x_easting', 'y_northing']).agg(
    hub_count=('is_hub', 'sum'),
    step_stone_count = ('is_step_stone', 'sum'),
    active_years=('centrality_score', 'count'),  # Counts active years
).reset_index()


# Step 3: Save to CSV
hub_counts_bc_ad.to_csv('E:/!!Research/!!!Data/graph_analysis/igraph_metrics_data/all_leks_metrics/redone/rnd_trip_summed_stepping_stones.csv', index=False)

hub_counts_bc_ad.tail()

Unnamed: 0,lek,x_easting,y_northing,hub_count,step_stone_count,active_years
1300,64850,668351,3860468,1,1,1
1301,66666,650336,3722341,2,0,6
1302,77777,651984,3722855,0,0,3
1303,88888,652426,3718903,1,0,2
1304,99999,649863,3714293,0,0,4


In [11]:
df_bc_ad = df[df['graph_type'] == '2']


# Count how many times each lek has been a hub, the number of active years, and exceedances of the 90th quantile
hub_counts_bc_ad = df_bc_ad.groupby(['lek', 'x_easting', 'y_northing']).agg(
    hub_count=('is_hub', 'sum'),
    step_stone_count = ('is_step_stone', 'sum'),
    active_years=('centrality_score', 'count'),  # Counts active years
).reset_index()


# Step 3: Save to CSV
hub_counts_bc_ad.to_csv('E:/!!Research/!!!Data/graph_analysis/igraph_metrics_data/all_leks_metrics/redone/thresh_2_summed_stepping_stones.csv', index=False)

hub_counts_bc_ad.tail()

Unnamed: 0,lek,x_easting,y_northing,hub_count,step_stone_count,active_years
1300,64850,668351,3860468,0,0,1
1301,66666,650336,3722341,1,1,6
1302,77777,651984,3722855,1,0,3
1303,88888,652426,3718903,0,0,2
1304,99999,649863,3714293,0,0,4


In [14]:
df_bc_ad = df[df['graph_type'] == '15']


# Count how many times each lek has been a hub, the number of active years, and exceedances of the 90th quantile
hub_counts_bc_ad = df_bc_ad.groupby(['lek', 'x_easting', 'y_northing']).agg(
    hub_count=('is_hub', 'sum'),
    step_stone_count = ('is_step_stone', 'sum'),
    active_years=('centrality_score', 'count'),  # Counts active years
).reset_index()


# Step 3: Save to CSV
hub_counts_bc_ad.to_csv('E:/!!Research/!!!Data/graph_analysis/igraph_metrics_data/all_leks_metrics/redone/thresh_15_summed_stepping_stones.csv', index=False)

hub_counts_bc_ad.tail()

Unnamed: 0,lek,x_easting,y_northing,hub_count,step_stone_count,active_years
1300,64850,668351,3860468,0,0,1
1301,66666,650336,3722341,1,1,6
1302,77777,651984,3722855,1,0,3
1303,88888,652426,3718903,1,1,2
1304,99999,649863,3714293,0,0,4
