### Exponential Dispersal Function

&nbsp;

An exponential dispersal function describes the spatiotemporal dispersal of wildlife. The exponential function represents how the density decreases as you move away from its source. The model in this script is listed below.

$$Y = p \times \mathrm{e}^{-\frac{X}{b}}$$

where

Y denotes the probability of dispersal at a distance X from the source

X denotes the distance from the source

p denotes the proportion which does not disperse and stays at the source (X = 0)

b denotes the average dispersal distance

In [1]:
import os
import pandas as pd
from pyproj import Proj
import math
import pyodbc
import itertools
from scipy.optimize import curve_fit
import numpy as np
os.chdir('C:/Users/tm/Downloads/utas/WildlifeDatabases')

### funcs

In [2]:
#gompertz curve equation
def dispersal_func(X,p,b):
    return p*np.exp(-X/b)

#using mle to estimate
def get_func_params(x,y):
    popt,pcov=curve_fit(dispersal_func,x,y,p0=(0.5,np.mean(x)))
    return popt

In [3]:
#coordinates conversion
def convert_easting_northing_to_lat_lon(easting, northing,):
    proj = Proj('+proj=utm +zone=55 +south +ellps=GRS80 +units=m +no_defs')
    lon, lat = proj(easting, northing, inverse=True)
    return lat, lon

#spherical distance computation by chatgpt
def haversine_distance(lat1, lon1, lat2, lon2):
    
    # Convert degrees to radians
    lat1_rad = math.radians(lat1)
    lon1_rad = math.radians(lon1)
    lat2_rad = math.radians(lat2)
    lon2_rad = math.radians(lon2)

    # Earth's radius in kilometers
    radius = 6371

    # Difference in latitudes and longitudes
    delta_lat = lat2_rad - lat1_rad
    delta_lon = lon2_rad - lon1_rad

    # Haversine formula
    a = math.sin(delta_lat / 2) ** 2 + math.cos(lat1_rad) * math.cos(lat2_rad) * math.sin(delta_lon / 2) ** 2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))

    # Calculate distance
    distance = radius * c

    return distance


### read files

In [4]:
# ['./arthur river/ARV_database_2022_11_CS.accdb',
#  './black river/BRI_database_2022_11_CS.accdb',
#  './freycinet/FNP_database_2022_11_KJS.accdb',
#  './takone/TKN_database_2022_11_KJS.accdb',
#  './west pencil pine/WPP_September_2022_11_KJS.accdb',
#  './wukalina/Wukalina_NOV2020_SN.accdb',
# './crabtree/CBT_database_2022_11_KJS.accdb','./franklin/FRA_database_devil_2022_11_KJS.accdb']

In [5]:
traphist=pd.DataFrame(columns=['ID', 'TrapID', 'DateOfUse', 'Event', 'CaptureType', 'Microchip',
       'Comment'])

traps=pd.DataFrame(columns=['TrapID', 'Northing', 'Easting', 'Comments', 'TrapLine', 'Researcher'])

for i in [
         './woodbridge sandfly/Channel_database_devil_2022_06_KJS.accdb',
        ]:

    conn = pyodbc.connect(r'Driver={Microsoft Access Driver (*.mdb, *.accdb)};DBQ='+f'{i};')
    traps=pd.concat([traps,pd.read_sql('select * from traps',conn)])
    traphist=pd.concat([traphist,pd.read_sql('select * from traphistory',conn)])

  traps=pd.concat([traps,pd.read_sql('select * from traps',conn)])
  traphist=pd.concat([traphist,pd.read_sql('select * from traphistory',conn)])


### cleanse

In [6]:
#focus on recapture
traphist=traphist[traphist['Event'].isin(['Tasmanian devil','Tasmanian Devil',])].copy()
target_ids=traphist['Microchip'][traphist['CaptureType']=='Recapture'].unique()
traphist=traphist[traphist['Microchip'].isin(target_ids)]
traps=traps[['TrapID','Northing', 'Easting']].drop_duplicates()

In [7]:
#merge trap info
grande=traphist.merge(traps,on='TrapID',how='left')

#remove duplicates
grande=grande.loc[grande['Northing'].dropna().index]
grande=grande.loc[grande['Microchip'].dropna().index]
grande=grande.drop_duplicates()
grande=grande.sort_values(['Microchip',"DateOfUse"])
grande.reset_index(inplace=True,drop=True)
grande=grande.loc[grande[['DateOfUse','Microchip']].drop_duplicates().index]

In [8]:
#only preserves animals that have been recaptured more than 300 days apart
target_ids=[]
for i in grande['Microchip'].unique():
    subset=grande[grande['Microchip']==i].copy()
    if len(subset)==1:
        continue
    counting=(subset['DateOfUse'].iloc[-1]-subset['DateOfUse'].iloc[0]).days
    if counting<300:
        continue
    target_ids.append(i)
    
#filter microchips
grande=grande[grande['Microchip'].isin(target_ids)]

In [9]:
#cleanse coordinates
grande['Northing']=grande['Northing'].str.replace('5224370.09\r\n5224370.09','5224370.09')
grande['Northing']=grande['Northing'].str.replace('0\r\n0','0')
grande['Northing']=grande['Northing'].astype(float)

grande['Easting']=grande['Easting'].str.replace('605373\r\n605373','605373')
grande['Easting']=grande['Easting'].str.replace('0\r\n0','0')
grande['Easting']=grande['Easting'].astype(float)

In [10]:
#convert coordinates
grande['lat'],grande['lon']=convert_easting_northing_to_lat_lon(grande['Easting'], grande['Northing'])

In [11]:
#get year and label
grande['year']=grande['DateOfUse'].dt.year

In [12]:
#prepare for trip identification
grande=grande.sort_values('DateOfUse')
grande['dif']=grande['DateOfUse'].diff().apply(lambda x:x.days)
grande.reset_index(inplace=True,drop=True)
grande['cumsum']=grande['dif'].cumsum()

In [13]:
#differentiate each trip
tripnum=0
arr=[]
for i in grande.index:
    if grande['cumsum'].loc[i]<=10:
        arr.append(tripnum)
    else:
        tripnum+=1
        grande['dif'].loc[:i]=0
        grande['cumsum']=grande['dif'].cumsum()
        arr.append(tripnum)
grande['tripnum']=arr

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  grande['dif'].loc[:i]=0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  grande['dif'].loc[:i]=0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  grande['dif'].loc[:i]=0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  grande['dif'].loc[:i]=0
A value is trying to be set on a copy of a slice from a DataFrame

See t

In [14]:
#assign trip num for each year
lastyear=1999
lasttrip=1
tripnum=0
arr=[]
for i in grande.index:
    if grande['tripnum'].loc[i]==lasttrip:
        pass
    else:
        lasttrip+=1
        if grande['year'].loc[i]==lastyear:
            tripnum+=1
        else:
            lastyear+=1
            tripnum=0
    arr.append(tripnum)   
grande['year-trip']=arr

In [15]:
#design trip name
grande['trip name']=grande['year'].astype(str)+' trip'+grande['year-trip'].astype(str)

In [16]:
#get first and last date for trip name
triprange=grande.groupby('tripnum').first()['DateOfUse'].astype(str)+' to '+grande.groupby('tripnum').last()['DateOfUse'].astype(str)
mapping=dict(zip(grande['tripnum'].unique(),triprange))
grande['trip range']=grande['tripnum'].apply(lambda x:mapping[x])

### compute X

In [17]:
#compute the distance of annual dispersal
output=pd.DataFrame(columns=['trip id', 'trip range', 'devil id', 'distance','trip coordinates'])
freq=grande.copy()

for i in freq['Microchip'].unique():
    subset=freq[freq['Microchip']==i].copy()
    combs=list(itertools.combinations(subset['DateOfUse'],2))

    #for each trip, only takes the first date of captured
    for ind,val in enumerate(combs):
        dif=(val[1]-val[0]).days
        if dif>330 and dif<390:
            id0=subset['tripnum'][subset['DateOfUse']==val[0]].iloc[0]
            id1=subset['tripnum'][subset['DateOfUse']==val[1]].iloc[0]
            combs[ind]=[str(id0)+'-'+str(id1),val]            
    result=[j for j in combs if type(j)==list]
    
    dataset=pd.DataFrame(result,columns=['trip id','trip range'])
    dataset=dataset.loc[dataset['trip id'].drop_duplicates().index]
    dataset['devil id']=i

    #compute spherical distance
    arr1=[]
    arr2=[]
    arr3=[]
    for k in dataset.index:
        startdate=dataset['trip range'].loc[k][0]
        enddate=dataset['trip range'].loc[k][1]

        lat1=subset['lat'][subset['DateOfUse']==startdate].iloc[0]
        lon1=subset['lon'][subset['DateOfUse']==startdate].iloc[0]
        lat2=subset['lat'][subset['DateOfUse']==enddate].iloc[0]
        lon2=subset['lon'][subset['DateOfUse']==enddate].iloc[0]
        

        arr1.append(haversine_distance(lat1, lon1, lat2, lon2))
        arr2.append((lat1, lon1, lat2, lon2))
    dataset['distance']=arr1
    dataset['trip coordinates']=arr2
    
    output=pd.concat([output,dataset])

output.reset_index(inplace=True,drop=True)

In [18]:
#remove coordinates error
output=output[output['distance']<50]

### compute Y

In [19]:
#group distance for probability computation
output['grid dist']=output['distance'].apply(lambda x:round(x,0))

In [20]:
#compute the probability of distance
prob=output.groupby(['grid dist']).count()[['trip id']]
prob['trip id']=prob['trip id']/prob['trip id'].sum()
prob.columns=['prob']
prob.reset_index(inplace=True)

In [21]:
#merge
output=output.merge(prob,on='grid dist',how='left')

In [22]:
#convert km to m
output['distance']*=1000

In [23]:
#result
get_func_params(output['distance'].tolist(),output['prob'].tolist())

array([3.30737883e-01, 9.81105178e+03])