### Automation Script Overhaul 
For automating the parsing, transformation, and geographic file creation from .GPX
Work taken from the script by Isias (ler_gpx.py) and workflow by Simone. Further developments by Kyle & Isais. <br><br>
Data collection must be done with Locus Map 4.x or formatted similarly to GPX files exportd by the map for this script to work.

### Section 1
This section takes the raw GPX files and makes them ready for QGIS and statistical analysis.

In [1]:
#import dependiencies
import gpxpy
import pandas as pd
import geopandas as gpd
import numpy as np
from glob import glob

# see all GPX files from a saved directory
gpxFiles = glob('GPX_Exports/*.gpx')
print(gpxFiles)

['GPX_Exports/20220607_Kyle.gpx']


##### Choose the cell below for single file manipulation OR the cell two below for mass import and manipulation.

In [2]:
# Choose which file to use, select (using 0-n) the gpx file to manipulate / Escolha qual arquivo usar, selecione (usando 0-n) o arquivo gpx para manipular
gpxCurrent = gpxFiles[0]
gpxCurrent = open(gpxCurrent)
gpxCurrent = gpxpy.parse(gpxCurrent)
gpxCurrent = gpxCurrent.to_xml()

# Read the XML into a dataframe, remove unnecessary columns, and shift columns / Leia o XML em um dataframe, remova colunas desnecessárias e desloque colunas
df = pd.read_xml(gpxCurrent)
df.pop('desc')
df.pop('hdop')
df.pop('time')
df = df.drop(index=0)
shiftPos = df.pop('name')
df.insert(0, 'name', shiftPos)
df.head()

Unnamed: 0,name,lat,lon,ele
1,2022-06-07 13:46:24j22,-7.517397,-34.967164,89.0
2,2022-06-07 13:49:07m4,-7.517351,-34.967194,89.0
3,2022-06-07 13:50:24j13,-7.517413,-34.967114,89.0
4,2022-06-07 13:53:31m4,-7.517787,-34.967158,84.0
5,2022-06-07 13:54:10m4,-7.51774,-34.967198,84.0


In [None]:
# Merge all GPX files in directory / Mesclar todos os arquivos GPX no diretório


In [3]:
# Convert to geodataframe for geographic use / Converter para geodataframe para uso geográfico
import geopandas as gpd

gdf = gpd.GeoDataFrame(
    df, geometry=gpd.points_from_xy(df.lon, df.lat)) # if Z is wanted for the points, add / se Z for desejado para os pontos, adicione ', df.ele' ex. (df.lon, df.lat, df.ele)
gdf = gdf.set_crs('EPSG:4326')
gdf.head()

Unnamed: 0,name,lat,lon,ele,geometry
1,2022-06-07 13:46:24j22,-7.517397,-34.967164,89.0,POINT (-34.96716 -7.51740)
2,2022-06-07 13:49:07m4,-7.517351,-34.967194,89.0,POINT (-34.96719 -7.51735)
3,2022-06-07 13:50:24j13,-7.517413,-34.967114,89.0,POINT (-34.96711 -7.51741)
4,2022-06-07 13:53:31m4,-7.517787,-34.967158,84.0,POINT (-34.96716 -7.51779)
5,2022-06-07 13:54:10m4,-7.51774,-34.967198,84.0,POINT (-34.96720 -7.51774)


In [4]:
# Ask for observer, group, climate conditions / Pergunte por observador, grupo, condições climáticas
observer = input('Observer/Observador? ')
group = input('Group/Grupo? (if both, mark 0) ') # or leave blank?
weather = input('Weather conditions/Condição do clima? ')
print('Observer/Observador: '+observer)
print('Group/Grupo: '+group)
print('Weather/Tempo: '+weather)

Observer/Observador: kyle
Group/Grupo: 0
Weather/Tempo: good


In [5]:
# If the above is correct, continue here. If not, re-run the cell and correct the information
# Se o acima estiver correto, continue aqui. Caso contrário, execute novamente a célula e corrija as informações

# Add the input to the dataframe
gdf.insert(loc=1, column='observer', value=observer, allow_duplicates=True)
gdf.insert(loc=1, column='group', value=group, allow_duplicates=True)
gdf.insert(loc=1, column='weather', value=weather, allow_duplicates=True)
gdf.head()

Unnamed: 0,name,weather,group,observer,lat,lon,ele,geometry
1,2022-06-07 13:46:24j22,good,0,kyle,-7.517397,-34.967164,89.0,POINT (-34.96716 -7.51740)
2,2022-06-07 13:49:07m4,good,0,kyle,-7.517351,-34.967194,89.0,POINT (-34.96719 -7.51735)
3,2022-06-07 13:50:24j13,good,0,kyle,-7.517413,-34.967114,89.0,POINT (-34.96711 -7.51741)
4,2022-06-07 13:53:31m4,good,0,kyle,-7.517787,-34.967158,84.0,POINT (-34.96716 -7.51779)
5,2022-06-07 13:54:10m4,good,0,kyle,-7.51774,-34.967198,84.0,POINT (-34.96720 -7.51774)


In [6]:
# Split 'name' into date, time, and observations / Dividir 'nome' em data, hora e observações

date = gdf['name'].str[:10]
gdf.insert(loc=0, column='date', value=date, allow_duplicates=True)

time = gdf['name'].str[11:19]
gdf.insert(loc=1, column='time', value=time, allow_duplicates=True)

obs = gdf['name'].str[19:]
gdf.insert(loc=2, column='observations', value=obs, allow_duplicates=True)

gdf.pop('name')

gdf.head()

Unnamed: 0,date,time,observations,weather,group,observer,lat,lon,ele,geometry
1,2022-06-07,13:46:24,j22,good,0,kyle,-7.517397,-34.967164,89.0,POINT (-34.96716 -7.51740)
2,2022-06-07,13:49:07,m4,good,0,kyle,-7.517351,-34.967194,89.0,POINT (-34.96719 -7.51735)
3,2022-06-07,13:50:24,j13,good,0,kyle,-7.517413,-34.967114,89.0,POINT (-34.96711 -7.51741)
4,2022-06-07,13:53:31,m4,good,0,kyle,-7.517787,-34.967158,84.0,POINT (-34.96716 -7.51779)
5,2022-06-07,13:54:10,m4,good,0,kyle,-7.51774,-34.967198,84.0,POINT (-34.96720 -7.51774)


In [7]:
# split observations into Age/Sex, Strata position, and behaviour
ageSex = ''
strata = ''
behaviour = ''
gdf.insert(loc=2, column='age/sex', value=ageSex, allow_duplicates=True)
gdf.insert(loc=3, column='strata', value=strata, allow_duplicates=True)
gdf.insert(loc=4, column='behaviour', value=behaviour, allow_duplicates=True)

In [13]:
for i, row in gdf.iterrows():
        # asSubstring = gdf['observations'].str[:2]
        if gdf['observations'].str[:2] == 'j2':
                ageSex = 'Juvenile 2'
                strata = gdf['observations'].str[2]
                behaviour = gdf['observations'].str[3:]
                gdf['age/sex']=ageSex
                gdf['strata']=strata
                gdf['behaviour']=behaviour
                

gdf.head()

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [None]:
gdf['age/sex'] = gdf['observations'].str.extract(['j2'])
gdf.head()

In [None]:
# Attempt to detect each scan, else create column for scan number / Tente detectar cada varredura, senão crie uma coluna para o número da varredura


In [None]:
# Export data frame to gpkg for use in QGIS / Exportar quadro de dados para gpkg para uso no QGIS
gdf.to_file('gdf.gpkg', driver="GPKG")
gdf.info()

### Section 2

In [None]:
gdf.plot()