### Automation Script Overhaul 
For automating the parsing, transformation, and geographic file creation from .GPX<br><br>
Work taken from the script by Isias (ler_gpx.py) and workflow by Simone. Further developments by Kyle & Isais. <br><br>
Data collection must be done with Locus Map 4.x or formatted similarly to GPX files exportd by Locus Map 4 for this script to work.

### Section 1
This section takes the raw GPX files and makes them ready for QGIS and statistical analysis.

In [None]:
import gpxpy
import pandas as pd
import geopandas as gpd
import os
from os import mkdir
from datetime import datetime, timedelta

# Create a dictionary with all gpx files in dictionary / Crie um dicionário com todos os arquivos gpx no dicionário

gpxDict = dict()

for file in os.listdir():
    if file.endswith('.gpx'):
       gpxDict[file] = 'file_'+file
gpxDict

##### Daily Data
This next for loop / cell will create files with the data sorted by day and must be run before creaating data files by scan.

In [8]:
import gpxpy
import pandas as pd
import geopandas as gpd
import warnings
import os
from os import mkdir
from datetime import datetime, timedelta

# Remove warning message for future warnings / Remover mensagem de aviso para avisos futuros
warnings.filterwarnings(action='ignore',category=FutureWarning)

# Get user decision for input fields and state the variables / Obtenha a decisão do usuário para campos de entrada e indique as variáveis
userInput = input('To add observer, group, and weather information for each day type \'yes\'. Otherwise hit escape')
observer = ''
group = ''
weather = ''

# Create a dictionary with all gpx files in dictionary / Crie um dicionário com todos os arquivos gpx no dicionário

gpxDict = dict()

for file in os.listdir():
    if file.endswith('.gpx'):
       gpxDict[file] = 'file_'+file

# Run for loop to cover every gpx file in directory / Execute o loop para cobrir todos os arquivos gpx no diretório

for i in gpxDict:
    
    # Open and read in the .gpx to a dataframe / Abra e leia no .gpx para um dataframe
    gpxCurrent = i
    gpxCurrent = open(gpxCurrent)
    gpxCurrent = gpxpy.parse(gpxCurrent)
    gpxCurrent = gpxCurrent.to_xml()
    df = pd.read_xml(gpxCurrent)

    # Ask for observer, group, climate conditions / Pergunte por observador, grupo, condições climáticas
    if userInput:
        observer = input('Input for '+i+': Observer/Observador? ')
        group = input('Input for '+i+': Group/Grupo? (if both, mark 0) ') # or leave blank?
        weather = input('Input for '+i+': Weather conditions/Condição do clima? ')
    
    # Remove unecessary columns / Remova colunas desnecessárias
    df.pop('desc')
    df.pop('time')
    if 'hdop' in df.columns:
        df.pop('hdop')
    df = df.drop(index=0)

    # Reorganize columns / Reorganizar colunas
    shiftPos = df.pop('name')
    df.insert(0, 'name', shiftPos)

    # Insert user input columns if they have a value / Insira colunas de entrada do usuário se elas tiverem um valor
    if observer:
        df.insert(loc=1, column='observer', value=observer, allow_duplicates=True)
    if group:
        df.insert(loc=1, column='group', value=group, allow_duplicates=True)
    if weather:
        df.insert(loc=1, column='weather', value=weather, allow_duplicates=True)

    # Split 'name' into date, time, and observations / Dividir 'nome' em data, hora e observações
    date = df['name'].str[:10]
    df.insert(loc=0, column='date', value=date, allow_duplicates=True)

    time = df['name'].str[11:19]
    df.insert(loc=1, column='time', value=time, allow_duplicates=True)

    obs = df['name'].str[19:]
    df.insert(loc=2, column='obs', value=obs, allow_duplicates=True)
    # Remove whitespace from observations column / Remover espaço em branco da coluna de observações
    df['obs'] = df['obs'].str.strip()

    df.pop('name')

    

    # Setup time variables for scan labeling / Variáveis de tempo de configuração para rotulagem de digitalização
    scanStart = df.at[1,'time']
    scanStart = datetime.strptime(scanStart,'%H:%M:%S')
    scanEnd = scanStart + timedelta(minutes=20)
    df.insert(loc=2, column='scan', value=0, allow_duplicates=True)
    scanMins = 20

    # Create list for temporary storage of scan ID's / Criar lista para armazenamento temporário de IDs de digitalização
    scanNum = []

    # Loop to check each time against the times for each day and assign scan ID
    # Faça um loop para verificar cada vez em relação aos horários de cada dia e atribuir a ID de verificação
    for row in df['time']:
        row = datetime.strptime(row,'%H:%M:%S')
        if scanStart <= row <= scanEnd:
            scanNum.append('1')
        elif (scanStart + timedelta(minutes=scanMins*2)) <= row <= (scanEnd + timedelta(minutes=scanMins*2)):
            scanNum.append('2')
        elif (scanStart + timedelta(minutes=scanMins*4)) <= row <= (scanEnd + timedelta(minutes=scanMins*4)):
            scanNum.append('3')
        elif (scanStart + timedelta(minutes=scanMins*6)) <= row <= (scanEnd + timedelta(minutes=scanMins*6)):
            scanNum.append('4')
        elif (scanStart + timedelta(minutes=scanMins*8)) <= row <= (scanEnd + timedelta(minutes=scanMins*8)):
            scanNum.append('5')
        elif (scanStart + timedelta(minutes=scanMins*10)) <= row <= (scanEnd + timedelta(minutes=scanMins*10)):
            scanNum.append('6')
        elif (scanStart + timedelta(minutes=scanMins*12)) <= row <= (scanEnd + timedelta(minutes=scanMins*12)):
            scanNum.append('7')
        elif (scanStart + timedelta(minutes=scanMins*14)) <= row <= (scanEnd + timedelta(minutes=scanMins*14)):
            scanNum.append('8')
        elif (scanStart + timedelta(minutes=scanMins*16)) <= row <= (scanEnd + timedelta(minutes=scanMins*16)):
            scanNum.append('9')
        elif (scanStart + timedelta(minutes=scanMins*18)) <= row <= (scanEnd + timedelta(minutes=scanMins*18)):
            scanNum.append('10')
        elif (scanStart + timedelta(minutes=scanMins*20)) <= row <= (scanEnd + timedelta(minutes=scanMins*20)):
            scanNum.append('11')
        elif (scanStart + timedelta(minutes=scanMins*22)) <= row <= (scanEnd + timedelta(minutes=scanMins*22)):
            scanNum.append('12')
        elif (scanStart + timedelta(minutes=scanMins*24)) <= row <= (scanEnd + timedelta(minutes=scanMins*24)):
            scanNum.append('13')
        elif (scanStart + timedelta(minutes=scanMins*26)) <= row <= (scanEnd + timedelta(minutes=scanMins*26)):
            scanNum.append('14')
        elif (scanStart + timedelta(minutes=scanMins*28)) <= row <= (scanEnd + timedelta(minutes=scanMins*28)):
            scanNum.append('15')
        elif (scanStart + timedelta(minutes=scanMins*30)) <= row <= (scanEnd + timedelta(minutes=scanMins*30)):
            scanNum.append('16')

        # If no times fit, apply N/A / Se nenhum tempo se encaixar, aplique N/A
        else:
            scanNum.append('') 

    # Apply scan ID list to the dataframe / Aplicar lista de IDs de varredura ao dataframe
    df['scan'] = scanNum

    # Create lists to store observations / Crie listas para armazenar observações
    scanAgeSex = []
    scanStrata = []
    scanBehaviour = []

    # Run loop to parse observations and store in lists / Executar loop para analisar observações e armazenar em listas
    for row in df['obs']:
            # Check the two character codes first to avoid conflicts and any misidentified lines such as 'mf' and 'ff' going to 'm' and 'f'
            # Verifique os dois códigos de caracteres primeiro para evitar conflitos e quaisquer linhas mal identificadas, como 'mf' e 'ff' indo para 'm' e 'f'
            if row[:2] == 'j1':
                    # Append relevant values to appropriate lists / Anexar valores relevantes a listas apropriadas
                    scanAgeSex.append('j1')
                    scanStrata.append(row[2:3])
                    if row[3:]:
                            scanBehaviour.append(row[3:])
                    else:
                            scanBehaviour.append('lof')
            elif row[:2] == 'j2':
                    scanAgeSex.append('j2')
                    scanStrata.append(row[2:3])
                    if row[3:]:
                            scanBehaviour.append(row[3:])
                    else:
                            scanBehaviour.append('lof')
            elif row[:2] == 'j3':
                    scanAgeSex.append('j3')
                    scanStrata.append(row[2:3])
                    if row[3:]:
                            scanBehaviour.append(row[3:])
                    else:
                            scanBehaviour.append('lof')
            elif row[:2] == 'ff':
                    scanAgeSex.append('ff')
                    scanStrata.append(row[2:3])
                    if row[3:]:
                            scanBehaviour.append(row[3:])
                    else:
                            scanBehaviour.append('lof')
            elif row[:2] == 'mf':
                    scanAgeSex.append('mf')
                    scanStrata.append(row[2:3])
                    if row[3:]:
                            scanBehaviour.append(row[3:])
                    else:
                            scanBehaviour.append('lof')
            elif row[:2] == 'sa':
                    scanAgeSex.append('sa')
                    scanStrata.append(row[2:3])
                    if row[3:]:
                            scanBehaviour.append(row[3:])
                    else:
                            scanBehaviour.append('lof')
            elif row[:2] == 'ni':
                    scanAgeSex.append('ni')
                    scanStrata.append(row[2:3])
                    if row[3:]:
                            scanBehaviour.append(row[3:])
                    else:
                            scanBehaviour.append('lof')
            elif row[:1] == 'f':
                    scanAgeSex.append('f')
                    scanStrata.append(row[1:2])
                    if row[3:]:
                            scanBehaviour.append(row[2:])
                    else:
                            scanBehaviour.append('lof')
            elif row[:1] == 'm':
                    scanAgeSex.append('m')
                    scanStrata.append(row[1:2])
                    if row[3:]:
                            scanBehaviour.append(row[2:])
                    else:
                            scanBehaviour.append('lof')
            elif row[:3] == 'ago':
                    scanAgeSex.append('ago')
                    scanStrata.append('')
                    scanBehaviour.append('')
            else:
                    scanAgeSex.append('ERROR')
                    scanStrata.append('ERROR')
                    scanBehaviour.append('ERROR')

    # Write lists to columns in the current dataframe / Gravar listas em colunas no dataframe atual
    df.insert(loc=2, column='strata', value=scanStrata, allow_duplicates=True)
    df.insert(loc=2, column='behaviour', value=scanBehaviour, allow_duplicates=True)
    df.insert(loc=2, column='age/sex', value=scanAgeSex, allow_duplicates=True)

    # Make geographic and set CRS / Faça geográfica e defina CRS
    gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.lon, df.lat)) 
    gdf = gdf.set_crs('EPSG:4326')

    # Export gdf into gpkg / Exportar gdf para gpkg
    gdf.to_file('daily.gpkg', driver="GPKG", layer=i[:-4])

    # Check and create save directory for csv files / Verifique e crie um diretório de salvamento para arquivos csv
    savePath = './csvDayFiles'
    isDir = os.path.isdir(savePath)
    if isDir == False:
        mkdir('csvDayFiles')
    
    # Save to csv / Salvar em csv
    gdf.to_csv('csvDayFiles/'+i[:-4]+'.csv')

In [None]:
gdfs1 = gdf[(gdf['scan'].isin(['1']))]
gdfs1

In [None]:
if gdf[(gdf['scan'].isin(['20']))].any():
    gdfs2 = gdf[(gdf['scan'].isin(['20']))]
gdfs2

##### Scan by scan data
This next cell will take the daily data (after manual cleanup preferably) and create new data files organized by date and scan number.

In [None]:
# export each scan as individual file in gpkg and csv
import pandas as pd
import geopandas as gpd
import os
from os import mkdir


for file in os.listdir('csvDayFiles'):
    if file.endswith('.csv'):
        df = pd.read_csv('csvDayFiles/'+file)
        df.pop('Unnamed: 0')

        # for i,  g in df.groupby('scan'):
            


# seperate by scan into new df

#### Section 2
Analysis of scans, analyze all of this for every individual scan

For my data

In [None]:
import pandas as pd
import geopandas as gpd

#import csv as geopandas dataframe
# path = input('Paste path to folder with csv\'s')

#fix when making a lop with path
geocsv = pd.read_csv('csvDayFiles/20220612_Kyle.csv')
# geocsv['Longitude'] = geocsv['Longitude'].str.replace(',','.')
# geocsv['Latitude'] = geocsv['Latitude'].str.replace(',','.')

geocsv = gpd.GeoDataFrame(geocsv, geometry=gpd.points_from_xy(geocsv.lon, geocsv.lat)) 
# geocsv = geocsv.set_crs('EPSG:31985')
geocsv.head()

In [None]:
# Find centroid of each scan (collect geometries, find centroid)
centroid = geocsv.dissolve().centroid
centroid.head()

In [None]:
# Distance of each point/animal to centroid

for row in geocsv['geometry']:
    geocsv['distCentr'] = (geocsv.distance(centroid[0]))
geocsv.head()
# DISTANCE UNIT NEEDS CONVERSION?? degrees to metres probably

In [None]:
#export centroid to file. need to create export system for each scan as its own gpkg
centroid.to_file('centroid.gpkg', driver="GPKG", layer='centroid')

In [None]:
geocsv.insert(loc=len(geocsv.columns), column='centroide grupo', value=centroid, allow_duplicates=True)
geocsv.head()

In [None]:
# points to polygons for area (ha) of group spread (convex hull)


In [None]:
# Distance between each centroid in temporal order


In [None]:
# Subgroups/cluster analysis, find clusters on eah scan and distance from each sub-centroid to main group centroid


#### Export Data

In [None]:
# Export data frame to gpkg for use in QGIS / Exportar quadro de dados para gpkg para uso no QGIS
