# Summary

Key findings from the analysis, presented in a journalist-friendly format.

In [1]:
import sys; sys.path.insert(0, ".")
from _shared import *
setup_plotting()

india_df = pd.read_parquet(CACHE_DIR / "india_df.parquet")
global_country_counts = pd.read_parquet(CACHE_DIR / "global_country_counts.parquet")["n_records"]
total_global = int(pd.read_json(CACHE_DIR / "metadata.json", typ="series", convert_dates=False)["total_global"])
india_splot = pd.read_parquet(CACHE_DIR / "india_splot.parquet")
splot_meta = pd.read_json(CACHE_DIR / "splot_metadata.json", typ="series", convert_dates=False)

In [2]:
# Recompute key derived values
comparison = (
    COUNTRY_REF
    .join(global_country_counts, how="inner")
    .assign(
        records_per_million_pop=lambda x: x.n_records / x.population_m,
        records_per_mkm2=lambda x: x.n_records / x.area_mkm2,
        records_per_species=lambda x: x.n_records / x.est_plant_species,
    )
)
india_row = comparison.loc["IN"]

state_counts = (
    india_df.groupby("stateprovince")
    .agg(n_records=("species", "size"), n_species=("species", "nunique"))
    .sort_values("n_records", ascending=False)
)

india_splot_plots = india_splot.PlotObservationID.nunique()
total_splot_plots = int(splot_meta["total_splot_plots"])

gbif_species = set(india_df.species.dropna().str.lower().unique())
splot_species = set(india_splot.speciesname.dropna().unique())
splot_only = splot_species - gbif_species

is_inat = india_df.datasetkey == INAT_DATASET_KEY

print("=" * 60)
print("SUMMARY: India's Contribution to GBIF Citizen Science")
print("=" * 60)
print()
print(f"Total India GBIF records: {len(india_df):,}")
print(f"Global share: {len(india_df) / total_global:.2%}")
print(f"Unique species (GBIF): {india_df.species.nunique():,}")
print()
print("--- Normalization ---")
print(f"Per million people: {india_row.records_per_million_pop:,.0f}")
print(f"Per million km\u00b2: {india_row.records_per_mkm2:,.0f}")
print(f"Per known species: {india_row.records_per_species:,.1f}")
print()
print("--- Key biases ---")
print(f"iNaturalist share: {is_inat.mean():.1%}")
print(f"Human observation share: {(india_df.basisofrecord == 'HUMAN_OBSERVATION').mean():.1%}")
print(f"Top state: {state_counts.index[0]} ({state_counts.n_records.iloc[0]:,} records)")
print()
print("--- sPlot vegetation surveys ---")
print(f"India sPlot plots: {india_splot_plots:,} ({india_splot_plots/total_splot_plots:.2%} of global)")
print(f"sPlot adds {len(splot_only):,} species not in GBIF")
print()
print("--- Growth ---")
recent_years = (
    india_df.dropna(subset=["year"])
    .query("2020 <= year <= 2024")
    .assign(year=lambda x: x.year.astype(int))
    .groupby("year")
    .size()
)
if len(recent_years) > 0:
    print(f"Records in most recent full year (2024): ~{recent_years.get(2024, 'N/A'):,}")
    print(f"Fastest growth period: last 5 years")

SUMMARY: India's Contribution to GBIF Citizen Science

Total India GBIF records: 200,175
Global share: 0.06%
Unique species (GBIF): 8,148

--- Normalization ---
Per million people: 140
Per million kmÂ²: 60,899
Per known species: 11.1

--- Key biases ---
iNaturalist share: 53.0%
Human observation share: 77.9%
Top state: Tamil Nadu (27,194 records)

--- sPlot vegetation surveys ---
India sPlot plots: 5,489 (0.25% of global)
sPlot adds 535 species not in GBIF

--- Growth ---
Records in most recent full year (2024): ~25,995
Fastest growth period: last 5 years
