### Automation Script Overhaul 
For automating the parsing, transformation, and geographic file creation from .GPX<br><br>
Work taken from the script by Isias (ler_gpx.py) and workflow by Simone. Further developments by Kyle & Isais. <br><br>
Data collection must be done with Locus Map 4.x or formatted similarly to GPX files exportd by Locus Map 4 for this script to work.

### Section 1
This section takes the raw GPX files and makes them ready for QGIS and statistical analysis.

In [1]:
import gpxpy
import pandas as pd
import geopandas as gpd
import os
from glob import glob

In [3]:
gpxDict = dict()

for file in os.listdir():
    if file.endswith('.gpx'):
       gpxDict[file] = 'file_'+file
gpxDict

{'20220607_Kyle.gpx': 'file_20220607_Kyle.gpx',
 '20220610_Kyle.gpx': 'file_20220610_Kyle.gpx'}

In [None]:
# gpxList = list(gpxDict)
# gpxList

In [8]:
# d = {} #create empty dictionary
merged = pd.DataFrame(columns=['name','lat','lon','ele'])


for i in gpxDict:
    gpxCurrent = i
    gpxCurrent = open(gpxCurrent)
    gpxCurrent = gpxpy.parse(gpxCurrent)
    gpxCurrent = gpxCurrent.to_xml()
    df = pd.read_xml(gpxCurrent) #open and read in the .gpx to a dataframe
    df.pop('desc')
    df.pop('hdop')
    df.pop('time')#remove unecessary columns
    df = df.drop(index=0)
    shiftPos = df.pop('name')
    df.insert(0, 'name', shiftPos)#reorganize columns
    merged = pd.concat([merged,df],ignore_index=True) # merge data frames one after another
    
merged

Unnamed: 0,name,lat,lon,ele
0,2022-06-07 13:46:24j22,-7.517397,-34.967164,89.0
1,2022-06-07 13:49:07m4,-7.517351,-34.967194,89.0
2,2022-06-07 13:50:24j13,-7.517413,-34.967114,89.0
3,2022-06-07 13:53:31m4,-7.517787,-34.967158,84.0
4,2022-06-07 13:54:10m4,-7.51774,-34.967198,84.0
...,...,...,...,...
243,2022-06-10 12:42:59 m2,-7.520838,-34.965507,79.0
244,2022-06-10 12:43:05 ago,-7.52094,-34.965517,78.0
245,2022-06-10 12:43:21 f3,-7.520722,-34.96549,81.0
246,2022-06-10 12:43:56 ni3,-7.52096,-34.965416,78.0


In [11]:
# Convert to geodataframe for geographic use / Converter para geodataframe para uso geográfico
gdf = gpd.GeoDataFrame(
    merged, geometry=gpd.points_from_xy(merged.lon, merged.lat)) # if Z is wanted for the points, add / se Z for desejado para os pontos, adicione ', df.ele' ex. (df.lon, df.lat, df.ele)
gdf = gdf.set_crs('EPSG:4326')
gdf.head()

Unnamed: 0,name,lat,lon,ele,geometry
0,2022-06-07 13:46:24j22,-7.517397,-34.967164,89.0,POINT (-34.96716 -7.51740)
1,2022-06-07 13:49:07m4,-7.517351,-34.967194,89.0,POINT (-34.96719 -7.51735)
2,2022-06-07 13:50:24j13,-7.517413,-34.967114,89.0,POINT (-34.96711 -7.51741)
3,2022-06-07 13:53:31m4,-7.517787,-34.967158,84.0,POINT (-34.96716 -7.51779)
4,2022-06-07 13:54:10m4,-7.51774,-34.967198,84.0,POINT (-34.96720 -7.51774)


In [12]:
# Ask for observer, group, climate conditions / Pergunte por observador, grupo, condições climáticas
observer = input('Observer/Observador? ')
group = input('Group/Grupo? (if both, mark 0) ') # or leave blank?
weather = input('Weather conditions/Condição do clima? ')
print('Observer/Observador: '+observer)
print('Group/Grupo: '+group)
print('Weather/Tempo: '+weather)

Observer/Observador: Kyle
Group/Grupo: 0
Weather/Tempo: good


 If the above is correct, continue here. If not, re-run the cell and correct the information<br><br>
 Se o acima estiver correto, continue aqui. Caso contrário, execute novamente a célula e corrija as informações

In [13]:
# Add the input to the dataframe
gdf.insert(loc=1, column='observer', value=observer, allow_duplicates=True)
gdf.insert(loc=1, column='group', value=group, allow_duplicates=True)
gdf.insert(loc=1, column='weather', value=weather, allow_duplicates=True)
gdf.head()

Unnamed: 0,name,weather,group,observer,lat,lon,ele,geometry
0,2022-06-07 13:46:24j22,good,0,Kyle,-7.517397,-34.967164,89.0,POINT (-34.96716 -7.51740)
1,2022-06-07 13:49:07m4,good,0,Kyle,-7.517351,-34.967194,89.0,POINT (-34.96719 -7.51735)
2,2022-06-07 13:50:24j13,good,0,Kyle,-7.517413,-34.967114,89.0,POINT (-34.96711 -7.51741)
3,2022-06-07 13:53:31m4,good,0,Kyle,-7.517787,-34.967158,84.0,POINT (-34.96716 -7.51779)
4,2022-06-07 13:54:10m4,good,0,Kyle,-7.51774,-34.967198,84.0,POINT (-34.96720 -7.51774)


In [14]:
# Split 'name' into date, time, and observations / Dividir 'nome' em data, hora e observações
date = gdf['name'].str[:10]
gdf.insert(loc=0, column='date', value=date, allow_duplicates=True)

time = gdf['name'].str[11:19]
gdf.insert(loc=1, column='time', value=time, allow_duplicates=True)

obs = gdf['name'].str[19:]
gdf.insert(loc=2, column='observations', value=obs, allow_duplicates=True)

gdf.pop('name')

gdf.head()

Unnamed: 0,date,time,observations,weather,group,observer,lat,lon,ele,geometry
0,2022-06-07,13:46:24,j22,good,0,Kyle,-7.517397,-34.967164,89.0,POINT (-34.96716 -7.51740)
1,2022-06-07,13:49:07,m4,good,0,Kyle,-7.517351,-34.967194,89.0,POINT (-34.96719 -7.51735)
2,2022-06-07,13:50:24,j13,good,0,Kyle,-7.517413,-34.967114,89.0,POINT (-34.96711 -7.51741)
3,2022-06-07,13:53:31,m4,good,0,Kyle,-7.517787,-34.967158,84.0,POINT (-34.96716 -7.51779)
4,2022-06-07,13:54:10,m4,good,0,Kyle,-7.51774,-34.967198,84.0,POINT (-34.96720 -7.51774)


In [None]:
# # split observations into Age/Sex, Strata position, and behaviour
# ageSex = ''
# strata = ''
# behaviour = ''
# gdf.insert(loc=2, column='age/sex', value=ageSex, allow_duplicates=True)
# gdf.insert(loc=3, column='strata', value=strata, allow_duplicates=True)
# gdf.insert(loc=4, column='behaviour', value=behaviour, allow_duplicates=True)

In [None]:
# Run loop to identify age, sex, and behaviour
for i, row in gdf.iterrows():
        # asSubstring = gdf['observations'].str[:2]
        if gdf['observations'].str[:2] == 'j2':
                ageSex = 'Juvenile 2'
                strata = gdf['observations'].str[2]
                behaviour = gdf['observations'].str[3:]
                gdf['age/sex']=ageSex
                gdf['strata']=strata
                gdf['behaviour']=behaviour
                

gdf.head()

In [None]:
gdf.head()

In [None]:
# Attempt to detect each scan, else create column for scan number / Tente detectar cada varredura, senão crie uma coluna para o número da varredura


#### Section 2
Analysis of scans, analyze all of this for every individual scan

In [None]:
# Find centroid of each scan (collect geometries, find centroid)


In [None]:
# Distance of each point/animal to centroid


In [None]:
# points to polygons for area (ha) of group spread (convex hull)


In [None]:
# Distance between each centroid in temporal order


In [None]:
# Subgroups/cluster analysis, find clusters on eah scan and distance from each sub-centroid to main group centroid


#### Export Data

In [None]:
# Export data frame to gpkg for use in QGIS / Exportar quadro de dados para gpkg para uso no QGIS
gdf.to_file('gdf.gpkg', driver="GPKG")
gdf.info()

In [None]:
gdf.plot()