### Automation Script Overhaul 
For automating the parsing, transformation, and geographic file creation from .GPX<br><br>
Work taken from the script by Isias (ler_gpx.py) and workflow by Simone. Further developments by Kyle & Isais. <br><br>
Data collection must be done with Locus Map 4.x or formatted similarly to GPX files exportd by Locus Map 4 for this script to work.

### Section 1
This section takes the raw GPX files and makes them ready for QGIS and statistical analysis.

In [None]:
import gpxpy
import pandas as pd
import geopandas as gpd
import os
from os import mkdir
from datetime import datetime, timedelta

gpxDict = dict()

for file in os.listdir():
    if file.endswith('.gpx'):
       gpxDict[file] = 'file_'+file
gpxDict

##### Daily Data
This next for loop / cell will create files with the data sorted by day and must be run before creaating data files by scan.

In [None]:
# merged = pd.DataFrame()

for i in gpxDict:
    
    # Open and read in the .gpx to a dataframe / Abra e leia no .gpx para um dataframe
    gpxCurrent = i
    gpxCurrent = open(gpxCurrent)
    gpxCurrent = gpxpy.parse(gpxCurrent)
    gpxCurrent = gpxCurrent.to_xml()
    df = pd.read_xml(gpxCurrent)

    # Ask for observer, group, climate conditions / Pergunte por observador, grupo, condições climáticas
    observer = input('Input for '+i+': Observer/Observador? ')
    group = input('Input for '+i+': Group/Grupo? (if both, mark 0) ') # or leave blank?
    weather = input('Input for '+i+': Weather conditions/Condição do clima? ')
    
    # Remove unecessary columns / Remova colunas desnecessárias
    df.pop('desc')
    df.pop('hdop')
    df.pop('time')
    df = df.drop(index=0)

    # Reorganize columns / Reorganizar colunas
    shiftPos = df.pop('name')
    df.insert(0, 'name', shiftPos)

    # Insert user input columns / Inserir colunas de entrada do usuário
    df.insert(loc=1, column='observer', value=observer, allow_duplicates=True)
    df.insert(loc=1, column='group', value=group, allow_duplicates=True)
    df.insert(loc=1, column='weather', value=weather, allow_duplicates=True)

    # Split 'name' into date, time, and observations / Dividir 'nome' em data, hora e observações
    date = df['name'].str[:10]
    df.insert(loc=0, column='date', value=date, allow_duplicates=True)

    time = df['name'].str[11:19]
    df.insert(loc=1, column='time', value=time, allow_duplicates=True)

    obs = df['name'].str[19:]
    df.insert(loc=2, column='observations', value=obs, allow_duplicates=True)

    df.pop('name')

    # solve observations issue

    # Setup time variables for scan labeling / Variáveis de tempo de configuração para rotulagem de digitalização
    scanStart = df.at[1,'time']
    scanStart = datetime.strptime(scanStart,'%H:%M:%S')
    scanEnd = scanStart + timedelta(minutes=20)
    df.insert(loc=2, column='scan', value=0, allow_duplicates=True)
    scanMins = 20

    # Create list for temporary storage of scan ID's / Criar lista para armazenamento temporário de IDs de digitalização
    scanNum = []

    # Loop to check each time against the times for each day and assign scan ID
    # Faça um loop para verificar cada vez em relação aos horários de cada dia e atribuir a ID de verificação
    for row in df['time']:
        row = datetime.strptime(row,'%H:%M:%S')
        if scanStart <= row <= scanEnd:
            scanNum.append('1')
        elif (scanStart + timedelta(minutes=scanMins*2)) <= row <= (scanEnd + timedelta(minutes=scanMins*2)):
            scanNum.append('2')
        elif (scanStart + timedelta(minutes=scanMins*4)) <= row <= (scanEnd + timedelta(minutes=scanMins*4)):
            scanNum.append('3')
        elif (scanStart + timedelta(minutes=scanMins*6)) <= row <= (scanEnd + timedelta(minutes=scanMins*6)):
            scanNum.append('4')
        elif (scanStart + timedelta(minutes=scanMins*8)) <= row <= (scanEnd + timedelta(minutes=scanMins*8)):
            scanNum.append('5')
        elif (scanStart + timedelta(minutes=scanMins*10)) <= row <= (scanEnd + timedelta(minutes=scanMins*10)):
            scanNum.append('6')
        elif (scanStart + timedelta(minutes=scanMins*12)) <= row <= (scanEnd + timedelta(minutes=scanMins*12)):
            scanNum.append('7')
        elif (scanStart + timedelta(minutes=scanMins*14)) <= row <= (scanEnd + timedelta(minutes=scanMins*14)):
            scanNum.append('8')
        elif (scanStart + timedelta(minutes=scanMins*16)) <= row <= (scanEnd + timedelta(minutes=scanMins*16)):
            scanNum.append('9')
        elif (scanStart + timedelta(minutes=scanMins*18)) <= row <= (scanEnd + timedelta(minutes=scanMins*18)):
            scanNum.append('10')
        elif (scanStart + timedelta(minutes=scanMins*20)) <= row <= (scanEnd + timedelta(minutes=scanMins*20)):
            scanNum.append('11')
        elif (scanStart + timedelta(minutes=scanMins*22)) <= row <= (scanEnd + timedelta(minutes=scanMins*22)):
            scanNum.append('12')
        elif (scanStart + timedelta(minutes=scanMins*24)) <= row <= (scanEnd + timedelta(minutes=scanMins*24)):
            scanNum.append('13')
        elif (scanStart + timedelta(minutes=scanMins*26)) <= row <= (scanEnd + timedelta(minutes=scanMins*26)):
            scanNum.append('14')
        elif (scanStart + timedelta(minutes=scanMins*28)) <= row <= (scanEnd + timedelta(minutes=scanMins*28)):
            scanNum.append('15')
        elif (scanStart + timedelta(minutes=scanMins*30)) <= row <= (scanEnd + timedelta(minutes=scanMins*30)):
            scanNum.append('16')

        # If no times fit, apply N/A / Se nenhum tempo se encaixar, aplique N/A
        else:
            scanNum.append('N/A') 

    # Apply scan ID list to the dataframe / Aplicar lista de IDs de varredura ao dataframe
    df['scan'] = scanNum

    # Make geographic and set CRS / Faça geográfica e defina CRS
    gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.lon, df.lat)) 
    gdf = gdf.set_crs('EPSG:4326')

    # Export gdf into gpkg / Exportar gdf para gpkg
    gdf.to_file('daily_data.gpkg', driver="GPKG", layer=i)

    # Check and create save directory for csv files / Verifique e crie um diretório de salvamento para arquivos csv
    savePath = './csvDayFiles'
    isDir = os.path.isdir(savePath)
    if isDir == False:
        mkdir('csvDayFiles')
    
    # Save to csv / Salvar em csv
    gdf.to_csv('csvDayFiles/'+i[:-4]+'.csv')

In [None]:
# # split observations into Age/Sex, Strata position, and behaviour
# ageSex = ''
# strata = ''
# behaviour = ''
# gdf.insert(loc=2, column='age/sex', value=ageSex, allow_duplicates=True)
# gdf.insert(loc=3, column='strata', value=strata, allow_duplicates=True)
# gdf.insert(loc=4, column='behaviour', value=behaviour, allow_duplicates=True)

In [None]:
#IF OBS ENDS WITH NUMBER
# df['side'] = df.id.str[-1] #CHECK LAST DIGIT FOR NUMBER
# IF NUMBER REMOVE NUMBER AS STRATA


# Run loop to identify age, sex, and behaviour
for i, row in gdf.iterrows():
        # asSubstring = gdf['observations'].str[:2]
        if gdf['observations'].str[:2] == 'j2':
                ageSex = 'Juvenile 2'
                strata = gdf['observations'].str[2]
                behaviour = gdf['observations'].str[3:]
                gdf['age/sex']=ageSex
                gdf['strata']=strata
                gdf['behaviour']=behaviour
                

gdf.head()

##### Scan by scan data
This next cell will take the daily data (after manual cleanup preferably) and create new data files organized by date and scan number.

In [None]:
# export each scan as individual file in gpkg and csv

#### Section 2
Analysis of scans, analyze all of this for every individual scan

In [None]:
# Find centroid of each scan (collect geometries, find centroid)


In [None]:
# Distance of each point/animal to centroid


In [None]:
# points to polygons for area (ha) of group spread (convex hull)


In [None]:
# Distance between each centroid in temporal order


In [None]:
# Subgroups/cluster analysis, find clusters on eah scan and distance from each sub-centroid to main group centroid


#### Export Data

In [None]:
# Export data frame to gpkg for use in QGIS / Exportar quadro de dados para gpkg para uso no QGIS
gdf.to_file('gdf.gpkg', driver="GPKG", layer='gdfExport')
gdf.info()

In [None]:
gdf.plot()