In [1]:
import pandas as pd

In [13]:
data = "E:/!!Research/!!!Data/graph_analysis/lek_data/lek_data_binary_rolling_activity.csv"
df = pd.read_csv(data)
df.head()

Unnamed: 0.1,Unnamed: 0,lek_id,year,x_easting,y_northing,activity,active_last_5_years
0,0,2,1971,672527,3730500,not surveyed,Inactive or not surveyed
1,1,2,1972,672527,3730500,not surveyed,Inactive or not surveyed
2,2,2,1973,672527,3730500,not surveyed,Inactive or not surveyed
3,3,2,1974,672527,3730500,not surveyed,Inactive or not surveyed
4,4,2,1975,672527,3730500,not surveyed,Inactive or not surveyed


In [16]:
# Group by 'lek_id' and aggregate data for each lek
summary = df.groupby('lek_id').agg(
    y_northing=('y_northing', 'first'),  # Retain the first northings value for each lek
    x_easting=('x_easting', 'first'),    # Retain the first eastings value for each lek
    active_count=('activity', lambda x: (x == 'TRUE').sum()),
    inactive_count=('activity', lambda x: (x == 'FALSE').sum()),
    not_surveyed_count=('activity', lambda x: (x == 'not surveyed').sum()),
    total_years_surveyed=('activity', lambda x: (x != 'not surveyed').sum()),
    #years_between_surveys=('year', lambda x: (x.diff().dropna().mean() if len(x.dropna()) > 1 else None)),
    active_percent=('activity', lambda x: (x == 'TRUE').sum() / (x != 'not surveyed').sum() * 100 if (x != 'not surveyed').sum() > 0 else 0),
    inactive_percent=('activity', lambda x: (x == 'FALSE').sum() / (x != 'not surveyed').sum() * 100 if (x != 'not surveyed').sum() > 0 else 0),
    active_percent_non_surveyed_years=('activity', lambda x: (x == 'TRUE').sum() / ((x == 'FALSE').sum() + (x == 'not surveyed').sum()) * 100 if ((x == 'FALSE').sum() + (x == 'not surveyed').sum()) > 0 else 0),
    inactive_percent_non_surveyed_years=('activity', lambda x: (x == 'FALSE').sum() / ((x == 'TRUE').sum() + (x == 'not surveyed').sum()) * 100 if ((x == 'TRUE').sum() + (x == 'not surveyed').sum()) > 0 else 0)
).reset_index()

summary = summary.round(2)

summary.head()

Unnamed: 0,lek_id,y_northing,x_easting,active_count,inactive_count,not_surveyed_count,total_years_surveyed,active_percent,inactive_percent,active_percent_non_surveyed_years,inactive_percent_non_surveyed_years
0,2,3730500,672527,4,0,44,4,100.0,0.0,9.09,0.0
1,4,3729786,669625,4,0,44,4,100.0,0.0,9.09,0.0
2,5,3731230,670774,4,0,44,4,100.0,0.0,9.09,0.0
3,6,3727387,673996,4,0,44,4,100.0,0.0,9.09,0.0
4,7,3728666,675163,4,0,44,4,100.0,0.0,9.09,0.0


In [17]:
# Save to CSV
summary.to_csv('E:/!!Research/!!!Data/graph_analysis/lek_data/lek_data_binary_stats.csv', index=False)

Looking at a tally of how many leks were added per year

In [33]:
df_2 = pd.read_csv(data)

# Step 1: Get the unique leks surveyed each year, keeping track of cumulative unique leks
unique_leks_surveyed = set()
cumulative_leks_surveyed = []
surveyed_counts_per_year = []
percent_surveyed_per_year = []
leks_not_surveyed = []

for year, group in df_2.groupby('year'):
    yearly_unique_leks = set(group[group['activity'] != 'not surveyed']['lek_id'])
    new_unique_leks = yearly_unique_leks - unique_leks_surveyed  # Only new leks this year
    unique_leks_surveyed.update(new_unique_leks)
    
    cumulative_count = len(unique_leks_surveyed)
    surveyed_this_year = len(yearly_unique_leks)
    
    cumulative_leks_surveyed.append(cumulative_count)
    surveyed_counts_per_year.append(surveyed_this_year)
    
    # Calculate the percent surveyed relative to the cumulative total up to this year
    percent_surveyed_per_year.append((surveyed_this_year / cumulative_count) * 100)
    
    # Calculate leks not surveyed as the difference between cumulative leks and surveyed this year
    leks_not_surveyed.append(cumulative_count - surveyed_this_year)

# Step 2: Create DataFrame with cumulative count and percent surveyed
summary_df = pd.DataFrame({
    'year': sorted(df_2['year'].unique()),
    'leks_surveyed_this_year': surveyed_counts_per_year,
    'cumulative_leks_surveyed': cumulative_leks_surveyed,
    'percent_surveyed': percent_surveyed_per_year,
    'leks_not_surveyed': leks_not_surveyed
})

# Step 3: Count active, inactive, and not surveyed leks per year
status_counts = df_2.groupby(['year', 'activity']).size().unstack(fill_value=0)

# Step 4: Merge status counts into the summary DataFrame
summary_df = summary_df.merge(status_counts, on='year', how='left').fillna(0)

# Rename status columns for clarity
summary_df = summary_df.rename(columns={
    'active': 'leks_active',
    'inactive': 'leks_inactive',
    'not surveyed': 'leks_not_surveyed_for_tally'
})

summary_df = summary_df.round(2)

summary_df.to_csv('E:/!!Research/!!!Data/graph_analysis/lek_data/lek_data_survey_tally.csv', index=False)

summary_df.head()

Unnamed: 0,year,leks_surveyed_this_year,cumulative_leks_surveyed,percent_surveyed,leks_not_surveyed,FALSE,TRUE,leks_not_surveyed_for_tally
0,1971,21,21,100.0,0,0,21,533
1,1972,17,23,73.91,6,1,16,537
2,1973,0,23,0.0,23,0,0,554
3,1974,23,24,95.83,1,11,12,531
4,1975,29,31,93.55,2,11,18,525
