In [1]:
import os
import re
import pandas as pd
import geopandas as gpd
from lxml import etree as ET
from shapely.geometry import LineString

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
# Convertir 'Flight Time' de milisegundos a minutos y segundos (MM:SS)
def convertir_tiempo(ms):
    try:
        ms = int(ms.strip())  # Convertir a entero y eliminar espacios extra
        segundos = ms // 1000  # Convertir milisegundos a segundos
        minutos = segundos // 60
        segundos = segundos % 60
        return f"{minutos}:{segundos:02d}"  # Formato MM:SS (ej. 9:35)
    except (ValueError, AttributeError):
        return None  # Si hay error, devolver None

def safe_float(value):
    """Convierte un valor a float, si es posible. Si es None o no numérico, devuelve None."""
    try:
        return float(value) if value is not None and value.strip() != "" else None
    except ValueError:
        return None

In [3]:
def convertir_kmls_a_shapefile(kml_folder, contenido):
    """
    Convierte varios archivos KML en un solo shapefile con proyección UTM zona 20S.
    Extrae valores de ExtendedData y los agrega como columnas.

    Parámetros:
    kml_folder (str): Carpeta que contiene los archivos KML.
    shapefile_path (str): Ruta donde se guardará el archivo shapefile combinado.
    """
    # Definir namespace de KML
    NAMESPACE = {"kml": "http://www.opengis.net/kml/2.2"}

    # Listas para almacenar los datos
    line_geometries = []
    filenames = []
    fechas = []
    horas = []
    ids = []
    
    aircraft_names = []
    flight_controller_ids = []
    pilot_names = []
    flight_times = []
    mode_selections = []
    heights = []
    route_spacings = []
    task_flight_speeds = []
    task_areas = []
    spray_amounts = []
    
    for filename in contenido:
        if filename.endswith('.kml'):
            kml_path = os.path.join(kml_folder, filename)
        
            try:
                with open(kml_path, 'r', encoding='utf-8') as file:
                    tree = ET.parse(file)
                    root = tree.getroot()
            except Exception as e:
                print(f"❌ Error al leer {filename}: {e}")
                continue
            
            # Extraer fecha, hora e ID del nombre del archivo
            match = re.search(r'T\d+_(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})_(R\d+)\.kml', filename)
            if not match:
                print(f"⚠ Advertencia: Nombre de archivo {filename} no coincide con el patrón esperado.")
                continue
            
            date = f"{match.group(1)}-{match.group(2)}-{match.group(3)}"
            time = f"{match.group(4)}:{match.group(5)}:{match.group(6)}"
            record_id = match.group(7)

            # Buscar Placemark en XML
            placemarks = root.findall('.//Placemark')
            if not placemarks:
                placemarks = root.findall('.//kml:Placemark', NAMESPACE)

            if not placemarks:
                print(f"⚠ Advertencia: No se encontraron <Placemark> en {filename}.")
                continue

            for placemark in placemarks:
                # Buscar LineString correctamente
                line_string = placemark.find('.//LineString')
                if line_string is None:
                    line_string = placemark.find('.//kml:LineString', NAMESPACE)
                
                if line_string is not None:
                    # Buscar Coordinates correctamente
                    coordinates = line_string.find('.//coordinates')
                    if coordinates is None:
                        coordinates = line_string.find('.//kml:coordinates', NAMESPACE)

                    if coordinates is not None and coordinates.text.strip():
                        coords = coordinates.text.strip().split()
                        points = [tuple(map(float, coord.split(',')[:2])) for coord in coords]  # Solo lat, lon
                        line_geometries.append(LineString(points))
                        fechas.append(date)
                        horas.append(time)
                        ids.append(record_id)
                        filenames.append(filename)

                        # Extraer datos de ExtendedData correctamente
                        extended_data = placemark.find('.//ExtendedData')
                        if extended_data is None:
                            extended_data = placemark.find('.//kml:ExtendedData', NAMESPACE)
                        
                        data_dict = {}
                        if extended_data is not None:
                            for data in extended_data.findall('.//Data') or extended_data.findall('.//kml:Data', NAMESPACE):
                                if data is None:
                                    continue  # Skip if there's no Data element

                                name = data.get('name')
                                value_element = data.find('.//kml:value', NAMESPACE)
                                if value_element is None:
                                    value_element = data.find('.//value')

                                value = value_element.text.strip() if value_element is not None and value_element.text else None
                                if name:
                                    data_dict[name] = value  # Store only if the name exists
                    
                        # Agregar valores de ExtendedData a las listas
                        aircraft_names.append(data_dict.get("Aircraft Name", None))
                        flight_controller_ids.append(data_dict.get("Flight Controller ID", None))
                        pilot_names.append(data_dict.get("Pilot Name", None))
                        flight_times.append(convertir_tiempo(data_dict.get("Flight Time", "0")))
                        mode_selections.append(data_dict.get("Mode Selection", None))
                        heights.append(data_dict.get("Height", None))
                        route_spacings.append(data_dict.get("Route Spacing", None))
                        
                        speed_ms = data_dict.get("Task Flight Speed")  # Obtener el valor si existe
                        if speed_ms is None or speed_ms.strip() == "":  # Manejar valores vacíos o None
                            speed_kmh = None
                        else:
                            speed_kmh = float(speed_ms) * 3.6 if speed_ms.replace('.', '', 1).isdigit() else None  # Convertir a km/h
                        task_flight_speeds.append(speed_kmh)
                        
                        task_areas.append(data_dict.get("Task Area", None))
                        spray_amounts.append(data_dict.get("Spray amount", None))

    if not line_geometries:
        print("❌ No se encontraron geometrías válidas en los archivos KML.")
        return

    # Crear un GeoDataFrame con todas las columnas
    gdf = gpd.GeoDataFrame({
        'filename': filenames,
        'fecha': fechas,
        'hora': horas,
        'id': ids,
        'aircraft_name': aircraft_names,
        'flight_controller_id': flight_controller_ids,
        'pilot_name': pilot_names,
        'flight_time': flight_times,
        'mode_selection': mode_selections,
        'height': heights,
        'route_spacing': route_spacings,
        'task_flight_speed': task_flight_speeds,
        'task_area': task_areas,
        'spray_amount': spray_amounts,
        'geometry': line_geometries
    }, crs='EPSG:4326')

    # Transformar a UTM zona 20S (EPSG:32720)
    gdf_utm = gdf.to_crs(epsg=32720)

    gdf_utm['spray_amount'] = gdf_utm['spray_amount'].astype(float) / 1000
    gdf_utm['height'] = gdf_utm['height'].astype(float)
    gdf_utm['route_spacing'] = gdf_utm['route_spacing'].astype(float)
    gdf_utm['task_area'] = gdf_utm['task_area'].astype(float)
    
    gdf_utm.rename(columns={
    'filename': 'file',
    'aircraft_name': 'drone',
    'flight_controller_id': 'ctrl_id',
    'pilot_name': 'pilot',
    'flight_time': 'fl_time',
    'mode_selection': 'mode',
    'height': 'height',
    'route_spacing': 'spacing',
    'task_flight_speed': 'fl_speed',
    'task_area': 'area',
    'spray_amount': 'spray' }, inplace=True)
    
    # Guardar como shapefile
    # gdf_utm.to_file(shapefile_path, driver='ESRI Shapefile')
    # print(f"✅ Archivo shapefile guardado en: {shapefile_path}")
    return gdf_utm

In [4]:
#leer base de datos de recorridos
path_recorridos = r'G:\Ingenio Azucarero Guabira S.A\UTEA - SEMANAL - EQUIPO AVIACION UTEA\Pulverizacion\2025\SHP\RECORRIDOS.shp'
gdf_recorridos = gpd.read_file(path_recorridos)
gdf_recorridos

Unnamed: 0,file,fecha,hora,id,drone,ctrl_id,pilot,fl_time,mode,height,spacing,fl_speed,area,spray,idd,geometry
0,T40_20250307131940_R5630753528.kml,2025-03-07,13:19:40,R5630753528,T40,1581F574B238900100GF,MARIO SANCHEZ,9:26,Auto,5.0,9.640,29.880001,3.350000,33.825,1,"LINESTRING (503877.967 8094579.033, 503877.974..."
1,T40_20250307133501_R6431655408.kml,2025-03-07,13:35:01,R6431655408,T40,1581F574B238900100GF,MARIO SANCHEZ,9:46,Auto,5.0,9.640,29.880001,3.548667,35.917,1,"LINESTRING (503876.695 8094578.178, 503876.695..."
2,T40_20250307134823_R6551790690.kml,2025-03-07,13:48:23,R6551790690,T40,1581F574B238900100GF,MARIO SANCHEZ,9:13,Auto,4.4,9.640,29.880001,3.148667,31.913,1,"LINESTRING (503881.318 8094576.323, 503881.287..."
3,T40_20250307140126_R6611858331.kml,2025-03-07,14:01:26,R6611858331,T40,1581F574B238900100GF,MARIO SANCHEZ,9:23,Auto,4.4,9.640,29.880001,3.354000,34.072,1,"LINESTRING (503880.632 8094579.144, 503880.631..."
4,T40_20250307141430_R6832106348.kml,2025-03-07,14:14:30,R6832106348,T40,1581F574B238900100GF,MARIO SANCHEZ,10:48,Auto,4.4,9.640,29.880001,3.429333,34.590,1,"LINESTRING (503880.530 8094579.078, 503880.530..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99,T50_20250308063919_R5355679725.kml,2025-03-08,06:39:19,R5355679725,T50,1581F6BUB24630011R08,MARIO SANCHEZ,3:45,Auto,4.0,9.227,29.880001,0.468667,4.651,1,"LINESTRING (503897.642 8095156.368, 503792.268..."
100,T50_20250308073745_R6957483485.kml,2025-03-08,07:37:45,R6957483485,T50,1581F6BUB24630011R08,MARIO SANCHEZ,10:11,Auto,4.0,9.590,29.880001,3.615333,36.274,1,"LINESTRING (500237.518 8096338.480, 500237.518..."
101,T50_20250308075056_R7778407912.kml,2025-03-08,07:50:56,R7778407912,T50,1581F6BUB24630011R08,MARIO SANCHEZ,6:29,Auto,4.0,9.590,29.880001,1.677333,16.795,1,"LINESTRING (500237.351 8096339.196, 500237.351..."
102,T50_20250308075725_R7838475553.kml,2025-03-08,07:57:25,R7838475553,T50,1581F6BUB24630011R08,MARIO SANCHEZ,1:04,M/M+,0.0,0.000,0.000000,0.060667,2.181,1,"LINESTRING (500237.248 8096340.024, 500204.761..."


In [5]:
path_kml = r'G:\Ingenio Azucarero Guabira S.A\UTEA - SEMANAL - EQUIPO AVIACION UTEA\Pulverizacion\2025\KML_RECORRIDOS'
contenido = os.listdir(path_kml)
len(contenido)

136

In [6]:
# Filtrar la lista 'contenido' para excluir elementos que están en la columna 'nombre' del GeoDataFrame
contenido_filtrado = [item for item in contenido if item not in gdf_recorridos['file'].values]
len(contenido_filtrado)

32

In [8]:
gdf_nuevos_recorridos = convertir_kmls_a_shapefile(path_kml, contenido_filtrado)

In [9]:
gdf_nuevos_recorridos['idd'] = 0

In [10]:
len(gdf_nuevos_recorridos)

32

In [11]:
gdf_combined = gpd.GeoDataFrame(pd.concat([gdf_recorridos, gdf_nuevos_recorridos], ignore_index=True))

In [12]:
gdf_combined

Unnamed: 0,file,fecha,hora,id,drone,ctrl_id,pilot,fl_time,mode,height,spacing,fl_speed,area,spray,idd,geometry
0,T40_20250307131940_R5630753528.kml,2025-03-07,13:19:40,R5630753528,T40,1581F574B238900100GF,MARIO SANCHEZ,9:26,Auto,5.0,9.64,29.880001,3.350000,33.825,1,"LINESTRING (503877.967 8094579.033, 503877.974..."
1,T40_20250307133501_R6431655408.kml,2025-03-07,13:35:01,R6431655408,T40,1581F574B238900100GF,MARIO SANCHEZ,9:46,Auto,5.0,9.64,29.880001,3.548667,35.917,1,"LINESTRING (503876.695 8094578.178, 503876.695..."
2,T40_20250307134823_R6551790690.kml,2025-03-07,13:48:23,R6551790690,T40,1581F574B238900100GF,MARIO SANCHEZ,9:13,Auto,4.4,9.64,29.880001,3.148667,31.913,1,"LINESTRING (503881.318 8094576.323, 503881.287..."
3,T40_20250307140126_R6611858331.kml,2025-03-07,14:01:26,R6611858331,T40,1581F574B238900100GF,MARIO SANCHEZ,9:23,Auto,4.4,9.64,29.880001,3.354000,34.072,1,"LINESTRING (503880.632 8094579.144, 503880.631..."
4,T40_20250307141430_R6832106348.kml,2025-03-07,14:14:30,R6832106348,T40,1581F574B238900100GF,MARIO SANCHEZ,10:48,Auto,4.4,9.64,29.880001,3.429333,34.590,1,"LINESTRING (503880.530 8094579.078, 503880.530..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
131,T50_20250311154718_R5454671886.kml,2025-03-11,15:47:18,R5454671886,T50,1581F6BUB24630011R08,MARIO SANCHEZ,9:00,Auto,4.0,9.50,29.880001,3.051333,30.599,0,"LINESTRING (507742.047 8096325.129, 507742.046..."
132,T50_20250311155920_R5694942450.kml,2025-03-11,15:59:20,R5694942450,T50,1581F6BUB24630011R08,MARIO SANCHEZ,4:37,Auto,4.0,9.50,29.880001,1.091333,10.982,0,"LINESTRING (507742.011 8096327.591, 507742.011..."
133,T50_20250311160357_R5714964997.kml,2025-03-11,16:03:57,R5714964997,T50,1581F6BUB24630011R08,MARIO SANCHEZ,1:03,Auto,4.0,9.50,29.880001,0.076000,0.752,0,"LINESTRING (507772.682 8095365.584, 507772.625..."
134,T50_20250311160500_R5755010091.kml,2025-03-11,16:05:00,R5755010091,T50,1581F6BUB24630011R08,MARIO SANCHEZ,1:33,Auto,4.0,9.50,29.880001,0.689333,6.915,0,"LINESTRING (507781.665 8095367.549, 507781.667..."


In [13]:
gdf_combined.dtypes

file          object
fecha         object
hora          object
id            object
drone         object
ctrl_id       object
pilot         object
fl_time       object
mode          object
height       float64
spacing      float64
fl_speed     float64
area         float64
spray        float64
idd            int64
geometry    geometry
dtype: object

In [14]:
gdf_combined.to_file(path_recorridos, driver='ESRI Shapefile')

In [15]:
# Directorio donde están los archivos KML
path_kml = r'G:\Ingenio Azucarero Guabira S.A\UTEA - SEMANAL - EQUIPO AVIACION UTEA\Pulverizacion\2025\KML_RECORRIDOS'
contenido = os.listdir(path_kml)
len(contenido)

# Ejemplo de uso
kml_folder = path_kml
shapefile_path = f'{path_kml}/res.shp'
convertir_kmls_a_shapefile(kml_folder, shapefile_path)

❌ No se encontraron geometrías válidas en los archivos KML.


In [17]:
kml_folder

'G:\\Ingenio Azucarero Guabira S.A\\UTEA - SEMANAL - EQUIPO AVIACION UTEA\\Pulverizacion\\2025\\KML_RECORRIDOS'