In [3]:
import scipy as sp
import numpy as np
import math
import pandas as pd
from mpmath import besseli
import utm
import pyproj, datetime
from statistics import mode
import torch

## Goto wind estimation

Originating in the [2016 paper](https://www.science.org/doi/10.1126/sciadv.1700097), this method was originally written for R. Here, we will translate it to work in Python, and try to improve run speeds as well. Comparisons will be made across methods to ensure differences are not being produced.

In [4]:
samplingInterval = 60
timeWindow = 51
cutlength = 45
cutv = 4.1667
constv = 34.7/3.6

def Likelihoodww(data1,data2,cv): # calculate log-likelihood of the model
    def f(par):
        a = par[1]
        b = cv/sp.gamma(1+1/a)
        mx = par[2]
        my = par[3]
        wx = par[4]
        wy = par[5]
        L = 0
        for i in range(len(data1)):
            rr = np.sqrt((data1[i]*np.cos(data2[i]) - wx)**2 + (data1[i]*np.sin(data2[i]) - wy)**2)
            rx = (data1[i]*np.cos(data2[i])-wx)/rr
            ry = (data1[i]*np.sin(data2[i])-wy)/rr
            lp = (a-2)*math.log(rr) - (rr/b)**a + mx*rx + my*ry + math.log(a) - math.log(b) + (1-a)*math.log(b) - math.log(besseli(np.sqrt(mx**2 + my**2),0,))
            L = L+lp
        return L

def Weibull_sd(a,b): # standard deviation of Weibull distribution
    return b*np.sqrt(sp.gamma(1+2/a) - sp.gamma(1+1/a)*sp.gamma(1+1/a))

def Weibull_mean(a,b): # mean of Weibull distribution
    return b*sp.gamma(1+1/a)

def Von_Mises_sd(kappa): # standard deviation of von Mises distribution
    return 1/np.sqrt(kappa)

def readAxyGPS(filename): # read in AxyTrek GPS data (txt files)
    df = pd.read_csv(filename, sep = "\t", header = None, usecols = [0,1,2,3],
    names = ['Date','Time','lat','lon'])
    df['DT'] = pd.to_datetime(df['Date'] + " " + df['Time'],format="%d/%m/%Y %H:%M:%S")
    return df

def nearest(items, pivot): # find the nearest time position
    return min(items, key=lambda x: abs(x - pivot))

def timeRescale(dat,tdiff): # calculated indeces for rescaling time (DT) for regular sampling every tdiff mins
    return dat.iloc[np.arange(0,len(dat),step=np.timedelta64(tdiff,'m')/np.timedelta64(mode(np.diff(dat['DT'])),'s')).astype(int),:]

def spTrav(DT,lat,lon,threshold=0): # speed from time (DT), lat, and lon
    geod = pyproj.Geod(ellps='WGS84')
    _, _, distance = geod.inv(lon[0:-2],lat[0:-2],lon[1:-1],lat[1:-1])
    speed = (distance*10**-3)/np.array(np.diff(DT)/np.timedelta64(3600,'s'))
    if threshold != 0:
        while np.nanmax(speed) > threshold:
            lat = lat[speed < threshold]
            lon = lon[speed < threshold]
            _, _, distance = geod.inv(lon[0:-2],lat[0:-2],lon[1:-1],lat[1:-1])
            speed = (distance*10**-3)/np.array(np.diff(DT)/np.timedelta64(3600,'s'))
    return distance, speed

def XYfromUTM(lat,lon):
    return utm.from_latlon(lat,lon)

In [5]:
from sys import platform
import os, re, glob, pyproj, math, datetime
if platform == "darwin":
    fileloc = "/Volumes/GoogleDrive-112399531131798335686/My Drive/PhD/Data/2018Shearwater/AxyTrek/"
else:
    fileloc = "I:/My Drive/PD/Data/2018Shearwater/AxyTrek/"
# list all files
files = glob.glob(fileloc + "**/*.txt")
tags = np.unique([re.search('(AxyTrek[\\\\|/][0-9\-]+)[\\\\|/]',f).group(1) for f in files])
dat = readAxyGPS(files[1]) # read in
minDat = timeRescale(dat,1) # convert to 1 min fs
dt = (np.diff(minDat['DT']) / np.timedelta64(1,'s')).astype(int)

### Estimation system

Once the data are read in, the initial portion of the program deals with idnetifying suitable windows to run the estimation model. These windows are required to by 51 minutes (approximately) in length, and within those 51 minutes, have over 45 samples, assuming a sampling frequency of 1 fix per minute. We assume there to be some error in the sampling interval. In the original study, this was taken as 5 seconds (i.e. we can expect samples to be 60 $\pm$ 5 seconds).

Starting from the first possible startpoint (half the window size in samples), the model then runs through the following processes:

1. Define window size.
   1. Find position of data which is 25.5 minutes after initial point.
   2. Repeat but for before initial point.
   3. Assign these positions as start and end of the window.
2. Create a new vector of track speed and direction where the speed is above the threshold of 4.1667 m/s, the sample is within 65 s of the previous sample, and direction is not equal to 100.

At this stage, the model starts to run through a variety of 'initial headings', set as each integer between -3 and 3. For each initial heading, the following processes are run:

1. Create variable `inita` as a random variable generated from a normal distribution with mean 12.5 and standard deviation 5. `inita` must be greater than 5.
2. Calculate the mean heading from all headings within the window that passed the above requirements.
3. The sum of the initial heading and the mean heading is determined, and using these data, the following variables are estimated:
   1. `kappa`: the concentration parameter for a von Mises distribution
   2. `mux` and `muy`: the x and y components of `kappa`
   3. `wx` and `wy`: the x and y components of wind (the track vector - the heading)
4. The `inita`, `mux`, `muy`, `wx`, and `wy` variables are then optimised using log-likelihood and track speed and direction data alongside a constant assumed mean air speed (34.7 m/s).
5. The standard deviation of the heading vector perpendicular to the mean direction `yoko` and the standard deviation of the heading vector along the mean direction `tate` are calculated.

If convergence is not reached but `tate` can be calculated, the process is repeated until convergence is reached.

In [6]:
X,Y,_,_ = XYfromUTM(np.array(minDat['lat']),np.array(minDat['lon']))
vg_x_obs = np.diff(X)
vg_y_obs = np.diff(Y)
track_speed = np.sqrt(vg_y_obs**2 + vg_x_obs**2)/dt
track_direction = [math.atan2(vg_y_obs[x],vg_x_obs[x]) for x in range(len(vg_x_obs))]
time_window = 51
cutlength = 45
cutv = 4.1667
constv = 34.7/3.6
sampling_interval = np.timedelta64(mode(np.diff(dat['DT'])),'s').astype(int)
error_of_sampling_interval = 5
cutt = sampling_interval + error_of_sampling_interval
winwidth = time_window - 1

In [7]:
rrow = track_speed
drow = track_direction
tp = dat['DT']
startpoint = math.floor((winwidth*(60/sampling_interval))/2)
endpoint = len(rrow) - startpoint

In [15]:
from tokenize import Double


def findWindow(dt,center,windwidthsec):
    entr = 0
    passesE = False
    for qf in range(len(dt) - center):
        entr = entr + dt[center + qf]
        if entr > windwidthsec:
            passesE = True
            break
    entr = 0
    passesS = False
    for qb in range(center-1):
        entr = entr + dt[center-qb]
        if entr > windwidthsec:
            passesS = True
            break
    return passesE * passesS * (passesE - passesS > 44)

def trackVectors(id_hd,r,d,index):
    rr = []
    dd = []
    iindex = []
    for k in range(len(r)):
        if r[k] > cutv:
            rr = np.append(rr,r)
            dd = np.append(dd,d)
            iindex = np.append(iindex,index[k])
    inithd_first = id_hd/(3*pi/2)
    inita = 0
    while inita < 5:
        inita = np.abs(np.random.normal(12.5,5))
    meangd = np.arctan2(np.sum(np.sin(d)),np.sum(np.cos(d)))
    inithd = meangd + inithd_first
    initkappa = 

    np.mean(np.cos(d - meangd))

In [16]:
findWindow(dt,center,(51/2))

0

In [18]:
sp.special.iv(0,.5)

1.0634833707413236

In [59]:
def a1(r):
    return sp.special.iv(1,r) / sp.special.iv(0,r)
# def a1inv(r):
#     return 

In [61]:
sp.special.iv(1,.4) / sp.special.iv(0,.4)

0.19610381221799555

In R, the function `A1inv` returns a value `k` from input argument `r` such that

$A1inv(k) = A1(r)$

where 

$A1(r) = \frac{I_1(\kappa)}{I_0(\kappa)}$

where $I_1$ and $I_0$ are the first and zeroth order Bessel functions, respectively.

However, Python does not have such a function, and so this estimate for `k` must be calculated through maximum likelihood.

In [60]:
a1(.3)

0.14833742694087523

In [58]:
sp.optimize.minimize(a1inv,.4)

TypeError: '<' not supported between instances of 'NoneType' and 'float'