# **Funciones**

In [29]:
#librerías
import gpxpy
import pandas as pd
import os
from geopy.distance import geodesic
from datetime import timedelta
import numpy as np
import plotly.graph_objects as go
import re



In [30]:

# Función que carga los datos desde el gpx a df
def read_gpx(file: str) -> pd.DataFrame:
    df = None
    points = []
    with open(file) as f:
        gpx = gpxpy.parse(f)
    for segment in gpx.tracks[0].segments:
        for p in segment.points:
            points.append({
                'time': p.time,
                'latitude': p.latitude,
                'Longitude': p. longitude,
                'elevation': p.elevation
            })
    df = pd.DataFrame.from_records(points)
    return df

In [31]:
#Funcion para almacenar df en csv

def save_to_csv(df, filename):
    if not filename.endswith('.csv'):
        filename += '.csv'
    
    try:
        df.to_csv(filename, index=False)
        print(f"Data saved to {os.getcwd()}/{filename}")
    except Exception as e:
        print(f"Error: {e}")


In [32]:
##Funcion para calcular distancia geodesica entre puntos y diferencia de tiempo 

def calcular_distancia_geodesica_time(df, lat_col='latitude', long_col='Longitude',time_col='time'):
    # Crear una nueva columna de latitud y longitud juntas
    df['lat_long'] = df[[lat_col, long_col]].apply(tuple, axis=1)

    # Función para calcular la distancia geodésica entre dos puntos
    def calcular_distancia(row):
        punto_a = (row[lat_col], row[long_col])
        punto_b = (df.loc[row.name + 1, lat_col:long_col]
                   if row.name < len(df) - 1 else punto_a)
        return geodesic(punto_a, punto_b).km

    def diferencia_tiempo(df,time_col='time',factor_correccion=1):
         # Convertir la columna de tiempo al formato de fecha y hora de pandas
        df[time_col] = pd.to_datetime(df[time_col])

        # Ordenar el DataFrame por la columna de tiempo en orden ascendente
        df.sort_values(time_col, inplace=True)

        # Calcular la diferencia de tiempo entre pares de filas consecutivas
        df['diferencia_tiempo_segundos'] = (df['time'] - df['time'].shift(1)).dt.total_seconds().fillna(0) * factor_correccion
        
        # Calcular la diferencia de tiempo en horas
        df['diferencia_tiempo_horas'] = df['diferencia_tiempo_segundos'] / 3600

        df = df[df['distancia_geodesica_km'] <= 0.1]
        df = df[df['diferencia_tiempo_segundos'] <= 60]
        return df


    # Calcular la distancia geodésica entre pares de puntos consecutivos
    df['distancia_geodesica_km'] = df.apply(calcular_distancia, axis=1)

    # Eliminar la columna de latitud y longitud juntas
    df = df.drop('lat_long', axis=1)

    df = diferencia_tiempo(df)

    # Devolver el DataFrame con la columna adicional de distancia geodésica
    return df


In [33]:
##Funcion para calcular velocidad media e instantanea
def calcular_rapidez(df, dist_col='distancia_geodesica_km', time_col='diferencia_tiempo_segundos'):
    # Calcular la rapidez media en km/h
    df['rapidez_media_km_s'] = df[dist_col].abs() / df[time_col]
    df['rapidez_media_km_h'] = df[dist_col].abs() / df[time_col]*3600
    # Asignar un valor cero a la primera fila para evitar NaN o inf
    df.fillna(0, inplace=True)
    df.replace([np.inf, -np.inf], 0, inplace=True)
    
    df = df[df['rapidez_media_km_h'] <= 100]
    
    # Devolver el DataFrame con las columnas adicionales de velocidad
    return df



In [34]:
#Funcion para graficar
def plot_vel(df, cols, labels):
    for i in range(len(cols)):
        fig = go.Figure()
        fig.add_trace(go.Scatter(x=df['time'], y=df[cols[i]], mode='lines', name=f'{labels[i]}'))
        yaxis=re.search('\((.*?)\)', labels[i]).group(1)
        fig.update_layout(title=f'{labels[i]}', xaxis_title='Time', yaxis_title=yaxis)
        fig.show()

# **Ejecución**

In [35]:
module_path = os.path.dirname("__file__")
folder_path = os.path.join(module_path, "data_raw/")

df1 = read_gpx(folder_path+'recovery.01-Mar-2022-1533.gpx')
df2 = read_gpx(folder_path+'recovery.05-Mar-2022.1025.gpx')
df3 = read_gpx(folder_path+'recovery.25-May-2022-0907.gpx')
# Limpiando valor lejano
df3 = df3.drop(index=0).reset_index(drop=True)


In [36]:
datos_dist_time=calcular_distancia_geodesica_time(df2)
datos_dist_time

Unnamed: 0,time,latitude,Longitude,elevation,distancia_geodesica_km,diferencia_tiempo_segundos,diferencia_tiempo_horas
0,2022-03-04 00:04:34+00:00,6.180893,-75.577296,1589.639492,0.002160,0.0,0.000000
1,2022-03-04 00:04:35+00:00,6.180912,-75.577295,1588.165859,0.002155,1.0,0.000278
2,2022-03-04 00:04:43+00:00,6.180921,-75.577312,1591.435879,0.005286,8.0,0.002222
3,2022-03-04 00:04:46+00:00,6.180897,-75.577270,1587.116055,0.002682,3.0,0.000833
4,2022-03-04 00:04:48+00:00,6.180874,-75.577276,1586.772305,0.002999,2.0,0.000556
...,...,...,...,...,...,...,...
18588,2022-03-05 15:25:40+00:00,6.255059,-75.590578,1481.257603,0.002491,2.0,0.000556
18589,2022-03-05 15:25:42+00:00,6.255073,-75.590597,1480.845982,0.002383,2.0,0.000556
18590,2022-03-05 15:25:44+00:00,6.255086,-75.590614,1480.847569,0.002080,2.0,0.000556
18591,2022-03-05 15:25:46+00:00,6.255103,-75.590619,1480.186069,0.002090,2.0,0.000556


In [37]:
datos_proc=calcular_rapidez(datos_dist_time)
save_to_csv(datos_proc,'datos_finales')
datos_proc

Data saved to /home/d3im3r/Dropbox/Maestria/Ciencia_datos/ciencia-datos-final-DDS/datos_finales.csv


Unnamed: 0,time,latitude,Longitude,elevation,distancia_geodesica_km,diferencia_tiempo_segundos,diferencia_tiempo_horas,rapidez_media_km_s,rapidez_media_km_h
0,2022-03-04 00:04:34+00:00,6.180893,-75.577296,1589.639492,0.002160,0.0,0.000000,0.000000,0.000000
1,2022-03-04 00:04:35+00:00,6.180912,-75.577295,1588.165859,0.002155,1.0,0.000278,0.002155,7.757585
2,2022-03-04 00:04:43+00:00,6.180921,-75.577312,1591.435879,0.005286,8.0,0.002222,0.000661,2.378816
3,2022-03-04 00:04:46+00:00,6.180897,-75.577270,1587.116055,0.002682,3.0,0.000833,0.000894,3.218779
4,2022-03-04 00:04:48+00:00,6.180874,-75.577276,1586.772305,0.002999,2.0,0.000556,0.001499,5.397839
...,...,...,...,...,...,...,...,...,...
18588,2022-03-05 15:25:40+00:00,6.255059,-75.590578,1481.257603,0.002491,2.0,0.000556,0.001246,4.484345
18589,2022-03-05 15:25:42+00:00,6.255073,-75.590597,1480.845982,0.002383,2.0,0.000556,0.001192,4.289845
18590,2022-03-05 15:25:44+00:00,6.255086,-75.590614,1480.847569,0.002080,2.0,0.000556,0.001040,3.744690
18591,2022-03-05 15:25:46+00:00,6.255103,-75.590619,1480.186069,0.002090,2.0,0.000556,0.001045,3.762285


In [38]:
cols = ['rapidez_media_km_h', 'diferencia_tiempo_segundos','distancia_geodesica_km']
labels = ['Rapidez Media (km/h)', 'Delta de tiempo (s)','Distancia geodesica (km)']

plot_vel(datos_proc, cols, labels)