# CASA0009: Group Assignment (Part 1)

__Topic: An Investigation into the Biodiversity of England__

This set of codes is for the analysis of Part 1: Overiew of Biodiversity Levels in England.

In [1]:
# Load libraries

import pandas as pd
import geopandas as gpd
import numpy as np
import math

In [2]:
# Create function to compute number of occurrences, species richness and species evenness (ref: https://sciencing.com/calculate-species-evenness-2851.html)
def cal_indicators(df):
    
    # Compute number of occurrences
    occ = len(df)
    
    # Compute  species richness
    sp_richness = len(df.speciesKey.unique().tolist())
    
    # Create frequency table of species
    df_out = df.groupby('speciesKey').count().reset_index()[['speciesKey', 'gbifID']].rename({'gbifID': 'Count'}, axis = 1)

    # Compute proportion of all species
    df_out['Proportion'] = df_out['Count'] / occ

    # Compute natural log of proportion
    df_out['ln_P'] = df_out['Proportion'].apply(math.log)

    # Compute P x In P
    df_out['P_ln_P'] = df_out['Proportion'] * df_out['ln_P']

    # Compute Shannon diversity index
    H = -sum(df_out['P_ln_P'])

    # Compute species evenness
    if sp_richness == 1:
        sp_evenness = 0
    else:
        sp_evenness = H / math.log(sp_richness)
    
    return occ, sp_richness, sp_evenness

## Load boundaries of Local Authorities

In [3]:
# read in England LAs
eng_LA = gpd.read_file('Data/England_LA/eng_LA.shp')

In [4]:
# convert to BNG for computing area
eng_LA = eng_LA.to_crs('epsg:27700')

In [5]:
# compute area
eng_LA.SHAPE_Area = eng_LA.area

area_list = eng_LA.SHAPE_Area.tolist()

In [6]:
# reconvert back to WGS84 because Plotly uses WGS84
eng_LA = eng_LA.to_crs('epsg:4326')

## Compute for Each Year

In [7]:
# Set columns to import from file

col_name = ['gbifID', 
            'occurrenceStatus',
            'decimalLatitude', 
            'decimalLongitude',
            'month', 
            'year', 
            'speciesKey', 
            'issue']

In [8]:
# Set years from 2011 to 2021
years = ['2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021']


In [9]:
# Create empty dataframe to hold combined results
output_col = ['LAD21NM', 'year', 'occ_den', 'sp_richness', 'sp_evenness']
df_final = pd.DataFrame(columns=output_col)

In [None]:
for year in years:
    
    print(f'Analyzing {year}')
    
    # Set data path
    data_path = 'Data/Part1Data/England_'+year+'.zip'
    
    # Import data - occurrence data downloaded from GBIF with the following search parameters:
    # Administrative areas (gadm.org): GBR.1_1 (i.e. England)
    # Occurrence status: present
    # Year: Between start of 20xx and end of 20xx (between 2011 and 2021)
    pdf = pd.read_csv(data_path, compression='zip',sep='\t', low_memory=False, usecols=col_name)
    
    # Issue (1): Check that all occurrence status are 'PRESENT'
    pdf = pdf[pdf.occurrenceStatus == 'PRESENT']

    # Issue (2): Check that all species key is recorded
    pdf.dropna(axis=0, subset=['speciesKey'], inplace=True)
    
    # Convert to geopanda dataframe
    gdf = gpd.GeoDataFrame(pdf, 
                           geometry=gpd.points_from_xy(pdf['decimalLongitude'], pdf['decimalLatitude'], 
                                                       crs='epsg:4326'))
    
    # Spatial join to match to LADs
    gdf = gpd.sjoin(gdf, eng_LA, op='within')
    
    # create lists to hold LADs, number of occurrences, species list, species richness and species evenness
    lads = eng_LA.LAD21NM.tolist()
    occurrences_density = []
    species_richness = []
    species_evenness = []
    year_list = [year] * len(lads)
    
    # Compute indicators (occurrences density, species richness and species evenness) for each LAD
    for i in range(len(lads)):
    
        lad = lads[i]
        
        # segment out LAD
        df = gdf[gdf["LAD21NM"] == lad]
    
        # Compute indicators
        occ, sp_richness, sp_evenness = cal_indicators(df)
        occ = occ/area_list[i]
        sp_richness = sp_richness/area_list[i]
    
        # append values to list
        occurrences_density.append(occ)
        species_richness.append(sp_richness)
        species_evenness.append(sp_evenness)
        
    # Create dataframe
    df_ind = pd.DataFrame(list(zip(lads, year_list, occurrences_density, species_richness, species_evenness)), 
                          columns=output_col)
    
    # Save output
    out_path = 'Output/Part1/Yearly/lad_ind_'+year+'.csv'
    df_ind.to_csv(out_path, index=False)
    
    # Concatenate to final results
    df_final = pd.concat([df_final, df_ind], ignore_index=True)
    
print('Done!')

Analyzing 2011
Analyzing 2012
Analyzing 2013
Analyzing 2014
Analyzing 2015
Analyzing 2016
Analyzing 2017
Analyzing 2018
Analyzing 2019
Analyzing 2020
Analyzing 2021


In [None]:
df_final

In [None]:
# Save output
df_final.to_csv('Output/Part1/LA_ind.csv', index=False)