In [2]:
import geopandas as gpd
import pandas as pd
import numpy as np

import pyproj
from pyproj import Proj, transform
import reverse_geocoder as rg

from tqdm import tqdm

import warnings
warnings.simplefilter('ignore')


def add_lat_lon_columns(df):
    """
    Adds new columns 'lonD', 'latD', 'lonF', 'latF' to the dataframe with
    corresponding latitude and longitude values based on the 'xD', 'yD', 'xF',
    and 'yF' columns, which are in Lambert-93 projection.
    Parameters:
    -----------
    df : pandas.DataFrame
        The dataframe containing the columns 'xD', 'yD', 'xF', and 'yF'.
    Returns:
    --------
    pandas.DataFrame
        The original dataframe with the new 'lonD', 'latD', 'lonF', and 'latF'
        columns added.
    """
    # Define the input and output projections
    in_proj = pyproj.Proj(init='epsg:2154')  # Lambert-93
    out_proj = pyproj.Proj(init='epsg:4326')  # WGS84

    df['xD'] = df['xD'].replace(',', '.')
    df['yD'] = df['yD'].replace(',', '.')
    df['xF'] = df['xF'].replace(',', '.')
    df['yF'] = df['yF'].replace(',', '.')

    # Convert start coordinates to lat-long
    df['lonD'], df['latD'] = pyproj.transform(in_proj, out_proj, df['xD'], df['yD'])

    # Convert end coordinates to lat-long
    df['lonF'], df['latF'] = pyproj.transform(in_proj, out_proj, df['xF'], df['yF'])

    return df

def read_shape_file(path: str):
    shp = gpd.read_file(path)
    return shp

def fix_tmja(df):
    # Fixing a few columns
    df.longueur = df.longueur.apply(lambda x: float(x.replace(',', '.')))
    df['ratio_PL'] = df['ratio_PL'].apply(lambda x: x if x<=40 else x/10)
    
    # Add lattitude and longitude
    df = add_lat_lon_columns(df)
    
    # Add region and department with lattitude and longitude
    coordinates = [(i[1], i[0]) for i in shp_tmja[['lonD', 'latD']].values]
    results = rg.search(coordinates)
    df['region'] = [i['admin1'] for i in results]
    df['departement'] = [i['admin2']\
                         .replace('Departement de ', '')\
                         .replace('Departement du ', '')\
                         .replace('Departement des ', '')\
                         .replace("Departement d'", '')\
                         .replace('la ', '')\
                         .replace("l'", "") for i in results]
    
    return df

def fix_stations(df):
    df['Coordinates'] = df['Coordinates'].apply(lambda x: x.replace(',,', ','))
    return df

def indicate_crs(shp_file: gpd.geodataframe.GeoDataFrame, epsg:str):
    shp_file.set_crs(epsg, allow_override=True)
    return shp_file

def convert_long_lat_to_easting_northing(df, initial_epsg='epsg:4326', target_epsg='epsg:2154'):
    # Define the input and output projections
    in_proj = pyproj.Proj(init='epsg:4326')  # Lambert-93
    out_proj = pyproj.Proj(init='epsg:2154')  # WGS84

    long = df['Coordinates'].apply(lambda x: x.split(',')[1]).values.astype(float)
    lat = df['Coordinates'].apply(lambda x: x.split(',')[0]).values.astype(float)

    # Convert start coordinates to lat-long
    long_transformed, lat_transformed = pyproj.transform(in_proj, out_proj, long, lat)
    
    df.geometry = gpd.points_from_xy(x=long_transformed, 
                                     y=lat_transformed, crs='epsg:2154')
    
    return df

def convert_str_geometry_to_geometry_geometry(df):
    df['geometry'] = gpd.GeoSeries.from_wkt(df['geometry'])
    return df

def new_coordinates_creation(approximate_nb_of_points, shapefile_tmja: gpd.geodataframe.GeoDataFrame):
    total_distance = shapefile_tmja.geometry.length.sum() # in meters
    distance_between_coordinates = total_distance/approximate_nb_of_points
    points = []
    routes = []

    for idx in tqdm(shapefile_tmja.index):
        line = shapefile_tmja.geometry[idx]
        route = shapefile_tmja.route[idx]
        n_splits = int(line.length/distance_between_coordinates)
        
        if n_splits < 2:
            splitter = [line.interpolate((i/2), normalized=True) for i in range(2)]
        else:
            splitter = [line.interpolate((i/n_splits), normalized=True) for i in range(n_splits)]

        points.append(splitter)
        routes.append([route for i in (range(n_splits) if n_splits >= 2 else range(2))])
        
    routes = np.concatenate(routes)
    
    coordinates = pd.DataFrame([i.wkt.replace('POINT (', '').replace(')', '').split(' ') for i in np.concatenate(points)])
    coordinates.columns = ["easting", "northing"]
    coordinates['route'] = routes
    coordinates['geometry'] = gpd.points_from_xy(x=coordinates.easting, 
                                                 y=coordinates.northing,
                                                 crs=shp_tmja.crs)
    
    shp_coordinates = gpd.GeoDataFrame(coordinates)
    
    return shp_coordinates

def compute_distance_to_closest_large_hub(df_stations:gpd.geodataframe.GeoDataFrame, df_hub_elargies:gpd.geodataframe.GeoDataFrame):
    large_hub_list = []
    distance_to_large_hub_list = []

    for idx_station in tqdm(df_stations.index):
        geodf = gpd.GeoDataFrame(df_stations.iloc[idx_station].geometry.distance(df_hub_elargies.geometry))
        min_distance = df_stations.iloc[idx_station].geometry.distance(df_hub_elargies.geometry).min()
        idx_hub = geodf[geodf['geometry'] == min_distance].index[0]
        # print(stations.loc[idx_station, 'nom'], roads.loc[idx_road, 'nom'], min_distance)
        large_hub_list.append(df_hub_elargies.at[idx_hub, 'e1'])
        distance_to_large_hub_list.append(min_distance)


    df_stations['closest_large_hub'] = large_hub_list
    df_stations['distance_to_closest_large_hub'] = distance_to_large_hub_list
    
    return df_stations


def compute_distance_to_closest_dense_hub(df_stations:gpd.geodataframe.GeoDataFrame, df_hub_denses:gpd.geodataframe.GeoDataFrame):
    dense_hub_list = []
    distance_to_dense_hub_list = []

    for idx_station in tqdm(df_stations.index):
        geodf = gpd.GeoDataFrame(df_stations.iloc[idx_station].geometry.distance(df_hub_denses.geometry))
        min_distance = df_stations.iloc[idx_station].geometry.distance(df_hub_denses.geometry).min()
        idx_hub = geodf[geodf['geometry'] == min_distance].index[0]
        # print(stations.loc[idx_station, 'nom'], roads.loc[idx_road, 'nom'], min_distance)
        dense_hub_list.append(df_hub_denses.at[idx_hub, 'e1'])
        distance_to_dense_hub_list.append(min_distance)

    df_stations['closest_dense_hub'] = dense_hub_list
    df_stations['distance_to_closest_dense_hub'] = distance_to_dense_hub_list

    return df_stations

def compute_distance_to_each_station(df):
    for i in tqdm(df.index):
        URL = df.loc[i, 'URL']
        df[f'distance_to_{URL}'] = df.loc[i, 'geometry'].distance(df.geometry)
    return df

