In [None]:
import pandas as pd
import geopandas as gpd
import tempfile
import zipfile
import os
from shapely.geometry import LineString, Point, Polygon
import fiona

# Enable KML driver for reading and writing
fiona.supported_drivers['KML'] = 'rw'

In [None]:
# Determine geometry type based on sample coordinates
def determine_geometry_type(coords):
    if len(coords) == 1:
        return "Point"
    elif len(coords) >= 2 and coords[0] != coords[-1]:
        return "Line"
    elif len(coords) >= 4 and coords[0] == coords[-1]:
        return "Polygon"
    return "Unknown"

# Parse and validate coordinates
def parse_and_validate_coordinates(coord_string):
    if isinstance(coord_string, str):
        try:
            coords = [
                tuple(map(float, coord.split()[:2]))[::-1]
                for coord in coord_string.split(';')
                if len(coord.split()) >= 2
            ]
            geometry_type = determine_geometry_type(coords)
            return coords, geometry_type
        except ValueError:
            return [], "Invalid"
    else:
        return [], "Invalid"

# Convert data to GeoDataFrame
def convert_to_gdf(df, gps_col, transformation, geometry_type, selected_columns):
    # Convert datetime columns to strings
    for col in selected_columns:
        if pd.api.types.is_datetime64_any_dtype(df[col]):
            df[col] = df[col].astype(str)

    def create_geometry(coords):
        if geometry_type == "Point":
            return Point(coords[0])
        elif geometry_type == "Line" or (geometry_type == "Polygon" and transformation == "line"):
            return LineString(coords)
        elif geometry_type == "Polygon" and transformation == "polygon":
            return Polygon(coords)
        return None

    # Create geometry column based on coordinates
    df['geometry'] = df[gps_col].apply(lambda coord_string: create_geometry(parse_and_validate_coordinates(coord_string)[0]))

    # Return GeoDataFrame with selected columns
    gdf = gpd.GeoDataFrame(df[selected_columns + ['geometry']], geometry='geometry')

    # Set CRS to epsg 4326
    gdf.set_crs(epsg=4326, inplace=True)

    return gdf

In [None]:
# User Inputs
file_path = 'path/to/your/file.xlsx'  # Change to your file path
sheet_name = 'Sheet1'  # Change to your sheet name if using Excel
delimiter = ','  # Change to your delimiter if using CSV
gps_column = 'GPS'  # Change to your GPS column name
transformation = 'line'  # Change to 'polygon' if needed
selected_columns = ['GPS', 'OtherColumn1', 'OtherColumn2']  # Change to your selected columns
output_format = 'geojson'  # Change to desired output format: 'shapefile', 'kml', 'gpkg', 'geoparquet', 'geojson'

In [None]:
# Load Data
file_extension = file_path.split('.')[-1].lower()
if file_extension == 'xlsx':
    df = pd.read_excel(file_path, sheet_name=sheet_name)
elif file_extension == 'csv':
    df = pd.read_csv(file_path, delimiter=delimiter)

In [None]:
# Validate GPS Column
sample_coords, geometry_type = parse_and_validate_coordinates(df[gps_column].iloc[0])
if geometry_type == "Invalid":
    raise ValueError("The selected column does not contain valid GPS data.")

In [None]:
# Filter Data
df_filtered = df[df[gps_column].notnull()]
if df_filtered.empty:
    raise ValueError("No valid GPS data found.")

In [None]:
# Convert to GeoDataFrame
gdf = convert_to_gdf(df_filtered, gps_column, transformation, geometry_type, selected_columns)

In [None]:
# Save Output
with tempfile.TemporaryDirectory() as tmpdirname:
    output_base = os.path.join(tmpdirname, f"{sheet_name if sheet_name else 'data'}_{gps_column}_{transformation}")
    output_file = ""

    if output_format == "shapefile":
        gdf.to_file(f"{output_base}.shp")
        zip_filename = f"{output_base}.zip"
        with zipfile.ZipFile(zip_filename, 'w') as zipf:
            for ext in ['shp', 'shx', 'dbf', 'prj', 'cpg']:
                file = f"{output_base}.{ext}"
                if os.path.exists(file):
                    zipf.write(file, os.path.basename(file))
        output_file = zip_filename
    elif output_format == "kml":
        output_file = f"{output_base}.kml"
        gdf.to_file(output_file, driver='KML')
    elif output_format == "gpkg":
        output_file = f"{output_base}.gpkg"
        gdf.to_file(output_file, driver='GPKG')
    elif output_format == "geoparquet":
        output_file = f"{output_base}.parquet"
        gdf.to_parquet(output_file)
    elif output_format == "geojson":
        output_file = f"{output_base}.geojson"
        gdf.to_file(output_file, driver='GeoJSON')

    print("File created successfully! 🎉")
    print(f"Download the file from: {output_file}")