In [7]:
import pandas as pd
import matplotlib.colors as mcolors

In [4]:
def load_data_antenna():
    df_antenna = pd.read_csv(r'C:\Users\Gamy\Documents\GitHub\dashboard-taipy-bat-CMR\data\df_clean.csv', engine = "pyarrow", dtype_backend = "pyarrow")
    df_antenna['DATE'] = df_antenna['DATE'].astype('datetime64[ns]')
    df_antenna['HEURE'] = pd.to_datetime(df_antenna['HEURE'], format = '%H:%M:%S').dt.time
    df_antenna['ANNEE'] = df_antenna['ANNEE'].astype('int')
    df_antenna['COMMUNE'] = df_antenna['COMMUNE'].astype('str')
    df_antenna['LIEU_DIT'] = df_antenna['LIEU_DIT'].astype('str')
    df_antenna['PRECISION_MILIEU'] = df_antenna['PRECISION_MILIEU'].astype('str')
    df_antenna['DEPARTEMENT'] = df_antenna['DEPARTEMENT'].astype('str')
    df_antenna['CODE_ESP'] = df_antenna['CODE_ESP'].astype('str')
    df_antenna['MASSE'] = df_antenna['MASSE'].astype('str')
    df_antenna['AB'] = df_antenna['AB'].astype('str')
    df_antenna['SEXE'] = df_antenna['SEXE'].astype('str')
    df_antenna['ACTION'] = df_antenna['ACTION'].astype('str')
    df_antenna['ID_PIT'] = df_antenna['ID_PIT'].astype('str')
    df_antenna['NUM_PIT'] = df_antenna['NUM_PIT'].astype('str')
    df_antenna['LONG_L93'] = df_antenna['LONG_L93'].astype('float')
    df_antenna['LAT_L93'] = df_antenna['LAT_L93'].astype('float')
    df_antenna['LONG_WGS'] = df_antenna['LONG_WGS'].astype('float')
    df_antenna['LAT_WGS'] = df_antenna['LAT_WGS'].astype('float')
    return df_antenna

df_antenna = load_data_antenna()

In [9]:
def create_transition_matrix(df, remove_self_loops = True, reduce_self_loops = False, reduction_factor = 0.5):
    # Tri par individu et date pour suivre les transitions
    df = df.sort_values(by = ['NUM_PIT', 'DATE'])

    # Créer une colonne pour le lieu précédent
    df['LIEU_PRECEDENT'] = df.groupby('NUM_PIT')['LIEU_DIT'].shift()

    # Filtrer pour obtenir seulement les transitions valides (non nulles)
    df_transitions = df.dropna(subset = ['LIEU_PRECEDENT'])

    # Compter les transitions de chaque lieu vers un autre
    transition_counts = df_transitions.groupby(['LIEU_PRECEDENT', 'LIEU_DIT']).size().reset_index(name = 'count')

    # Retirer les transitions où source == target si demandé
    if remove_self_loops:
        transition_counts = transition_counts[transition_counts['LIEU_PRECEDENT'] != transition_counts['LIEU_DIT']]

    # Réduire le poids des transitions de recontrôle (si demandé)
    if reduce_self_loops:
        transition_counts.loc[transition_counts['LIEU_PRECEDENT'] == transition_counts['LIEU_DIT'], 'count'] *= reduction_factor

    # Construire une matrice de transition
    lieux = sorted(set(df['LIEU_DIT'].unique()) | set(df['LIEU_PRECEDENT'].dropna().unique()))
    transition_matrix = pd.DataFrame(0, index = lieux, columns = lieux)

    for _, row in transition_counts.iterrows():
        transition_matrix.at[row['LIEU_PRECEDENT'], row['LIEU_DIT']] = row['count']

    return transition_matrix, lieux

tm, labels = create_transition_matrix(df_antenna)

In [15]:
def process_transition_matrix(transition_matrix, df, threshold=9):
    # Transformer la matrice en table de connexions
    transition_table = transition_matrix.stack().reset_index()
    transition_table.columns = ['source', 'target', 'count']

    # Assurer l'ordre alphabétique des paires (source, target) pour un regroupement correct
    transition_table['site_pair'] = transition_table.apply(
        lambda row: tuple(sorted([row['source'], row['target']])), axis=1
    )

    # Grouper par paire de sites et sommer les counts
    transition_table = transition_table.groupby('site_pair', as_index=False).agg(
        count=('count', 'sum')  # Somme des counts
    )

    # Séparer les paires de sites dans des colonnes distinctes
    transition_table[['source', 'target']] = pd.DataFrame(transition_table['site_pair'].tolist(), index=transition_table.index)

    # Filtrer pour ne garder que les connexions avec au moins un certain nombre d'occurrences
    transition_table = transition_table[transition_table['count'] > threshold]  # Valeur du seuil de significativité à sélectionner

    # Obtenir les limites pour normaliser les valeurs de count
    mean_count = transition_table['count'].mean()
    std_count = transition_table['count'].std()

    # Normaliser les valeurs de count entre 0 et 1
    transition_table['normalized_count'] = (transition_table['count'] - mean_count) / std_count  # Centrer-normer

    # Création d'une colormap personnalisée
    cmap = mcolors.LinearSegmentedColormap.from_list(
        "CustomGreenYellowOrangeRed",
        ["#808080", "#4B4B4B", "#FC4C02", "#FF8C00", "#FF0000"]
    )

    # Calculer les couleurs basées sur les counts standardisés
    transition_table['color'] = transition_table['normalized_count'].apply(lambda x: mcolors.to_hex(cmap(x)))

    # Fusionner les coordonnées du df dans transition_table pour source et target
    coords = df[['LIEU_DIT', 'LAT_WGS', 'LONG_WGS']].drop_duplicates()
    coords = coords.set_index('LIEU_DIT')

    transition_table = transition_table.merge(coords, left_on='source', right_index=True)
    transition_table = transition_table.merge(coords, left_on='target', right_index=True, suffixes=('_source', '_target'))
    
    return transition_table

process_transition_matrix(tm, df_antenna, threshold = 0)

Unnamed: 0,site_pair,count,source,target,normalized_count,color,LAT_WGS_source,LONG_WGS_source,LAT_WGS_target,LONG_WGS_target
261,"(6 rue de Chalon, Guibaud)",1,6 rue de Chalon,Guibaud,-0.138590,#808080,47.071370,-0.030382,46.677104,-0.584546
632,"(62 rue de Nantes, Faye-L'Abbesse - Bourg)",1,62 rue de Nantes,Faye-L'Abbesse - Bourg,-0.138590,#808080,47.319898,0.395899,46.829032,-0.353364
1267,"(755, rue du milieu, Portillo del Gesal)",1,"755, rue du milieu",Portillo del Gesal,-0.138590,#808080,50.871592,1.933035,42.742133,-2.430383
1460,"(Ancien tunnel de la Peytivie, Centrale hydroé...",4,Ancien tunnel de la Peytivie,Centrale hydroélectrique de Claredent,-0.110422,#808080,45.044418,1.217503,45.171655,1.621266
1487,"(Ancien tunnel de la Peytivie, Château de Haut...",6,Ancien tunnel de la Peytivie,Château de Hautefort,-0.091643,#808080,45.044418,1.217503,45.259722,1.145533
...,...,...,...,...,...,...,...,...,...,...
96423,"(Tunnel ferroviaire, le rocher cheffois)",1,Tunnel ferroviaire,le rocher cheffois,-0.138590,#808080,46.506124,-0.806658,46.424356,-1.432524
96423,"(Tunnel ferroviaire, le rocher cheffois)",1,Tunnel ferroviaire,le rocher cheffois,-0.138590,#808080,46.506124,-0.806658,46.424356,-1.432524
96503,"(Viaduc de Lessac, Viaduc de Rouchat)",1,Viaduc de Lessac,Viaduc de Rouchat,-0.138590,#808080,46.065296,0.682955,45.330270,1.381616
96507,"(Viaduc de Lessac, la Ribe - Mine Ribe Ouest)",1,Viaduc de Lessac,la Ribe - Mine Ribe Ouest,-0.138590,#808080,46.065296,0.682955,46.194263,2.527356
