In [107]:
import pandas as pd
from datetime import datetime
import json
import numpy as np
from shapely.geometry import Point, Polygon


class Preprocessing():
    #This part is not general is only for this file
    def __init__(self,path="../new def traj/DataSealClassed_A_P_DateString.csv"):
        df=pd.read_csv(path)
        df.drop(columns="Unnamed: 0",inplace=True)
        df["ds_date"]=pd.to_datetime(df["ds_date"],format="%d/%m/%Y %H:%M")
        df["DE_DATE"]=pd.to_datetime(df["DE_DATE"],format="%d/%m/%Y %H:%M")
        df=df.sort_values(by=["ds_date","DE_DATE"])
        df["strat_heur"]=df["ds_date"].dt.strftime("%H:%M")
        df["end_heur"]=df["DE_DATE"].dt.strftime("%H:%M")
        
        self.df=df
        self.seal_members = df["SEAL"].unique()
        self.T=0

    #This function convert data from HH:mm time form into data number ex: 8:30->8.5 
    def convertTimeToNumber(self,time):
        heur=int(time.split(":")[0])
        minute=int(time.split(":")[1])
        return heur+minute/60
    #This function apply 'convertTimeToNumber' function to convert all column values in a df
    def convertHorsColumnToNumber(self):
        self.df["strat_heur"]=self.df["strat_heur"].apply(self.convertTimeToNumber)
        self.df["end_heur"]=self.df["end_heur"].apply(self.convertTimeToNumber)
    
    #This function allws to have the interval time 'T_minute' who contains
    # a specific time ex: T_minute=20 et t=18:15 => t=18:00
    def getFlooredTime_T(self,T_minute):
        self.T=T_minute
        self.df[f"ds_date_{self.T}_m"] = self.df["ds_date"].dt.floor(f'{T_minute}T')
        self.df[f"DE_DATE_{self.T}_m"] = self.df["DE_DATE"].dt.floor(f'{T_minute}T')+pd.Timedelta(minutes=T_minute)
    
    def unique_join(self,series):
        return ','.join(set(series))
    #This function allws to group dataframe rows who have the same identifier 'SEAL' and  
    # have the same start time of an interval, next we take the maximum end time , first point 
    # and last point of mouvement, afterward we gather all activities of aggregated rows in a set.
    def aggregate_data(self):
        df_sorted_copy=self.df.copy()
        grouped_df = df_sorted_copy.groupby(['SEAL',f'ds_date_{self.T}_m']).agg({
            f'DE_DATE_{self.T}_m': 'max' ,
            'start_lat': lambda x: (x.iloc[0]),  
            'LAT': lambda x: (x.iloc[-1]),
            'start_lon': lambda x: (x.iloc[0]),  
            'LON': lambda x: (x.iloc[-1]),
            'activities': self.unique_join,
            'strat_heur':lambda x: (x.iloc[0]),
            'end_heur':lambda x: (x.iloc[-1]),
            'ds_date':lambda x: (x.iloc[0]),
            'DE_DATE':lambda x: (x.iloc[-1])
        }).reset_index()

        return grouped_df
    
    #This function allws to localize any point if it is in ocean or not
    def location_MT_prepro(self,T_minute):
        goemapOcean=pd.read_json("oceanv1.geojson")
        oceonPolygG = goemapOcean["features"][0]["geometry"]["coordinates"][0]
        for index in range(len(oceonPolygG)):
            oceonPolygG[index].reverse()

        corOce={"coordinatesOc":oceonPolygG}
        with open("oceanCoord.json","w") as file:
            json.dump(corOce,file,indent=4)
        dfO=pd.read_json("oceanCoord.json")
        
        
        goemapTerre=pd.read_json("terrev1.geojson")
        terrePolygG = goemapTerre["features"][0]["geometry"]["coordinates"][0]

        for index in range(len(terrePolygG)):
            terrePolygG[index].reverse()

        corTer={"coordinatesTr":terrePolygG}
        with open("terreCoord.json","w") as file2:
            json.dump(corTer,file2,indent=4)
        dfT=pd.read_json("terreCoord.json")
        
#         dfT["coordinatesTr"],dfO["coordinatesOc"]
        mer_polygon = Polygon(list(dfO["coordinatesOc"]))
        terre_polygon = Polygon(list(dfT["coordinatesTr"]))
        
        self.convertHorsColumnToNumber()
        self.getFlooredTime_T(T_minute)
        self.df=self.aggregate_data()
        
        start_zones_ter_mer=[]
        for index,row in self.df.iterrows():
            if terre_polygon.contains(Point(float(row["start_lat"]),float(row["start_lon"]))):
                start_zones_ter_mer.append("T")
            elif mer_polygon.contains(Point(float(row["start_lat"]),float(row["start_lon"]))):
                start_zones_ter_mer.append("M")
            else:
                start_zones_ter_mer.append("Out")
        end_zones_ter_mer=[]
        for index,row in self.df.iterrows():
            if terre_polygon.contains(Point(float(row["LAT"]),float(row["LON"]))):
                end_zones_ter_mer.append("T")
            elif mer_polygon.contains(Point(float(row["LAT"]),float(row["LON"]))):
                end_zones_ter_mer.append("M")
            else:
                end_zones_ter_mer.append("Out")
        self.df["start_MT"] = start_zones_ter_mer
        self.df["end_MT"] = end_zones_ter_mer
    #Convert date type to timstamp to allws us to use the interval notion with R values.
    def convertDateToSeconds(self, date):
        # Si date est un Timestamp, le convertir en str
        if isinstance(date, pd.Timestamp):
            date = date.strftime("%Y-%m-%d %H:%M:%S")
        date_heure = datetime.strptime(date, "%Y-%m-%d %H:%M:%S")
        timestamp_en_secondes = date_heure.timestamp()
        return timestamp_en_secondes

        return timestamp_en_secondes
    # use 'convertDateToSeconds' to convert all rows values
    def convertToSecond(self):
        self.df[f"ds_date_{self.T}_m_ts"]=self.df[f"ds_date_{self.T}_m"].apply(self.convertDateToSeconds)
        self.df[f"DE_DATE_{self.T}_m_ts"]=self.df[f"DE_DATE_{self.T}_m"].apply(self.convertDateToSeconds)
    
    #Apply 'location_MT_prepro' and 'convertToSecond' on the dataFrame for a preprocessing task
    def preprocessing(self,T_minute):
        self.location_MT_prepro(T_minute)
        self.convertToSecond()
        
    # use 'interruptions' to split entier trajectory on trajectories where each trajectory is on ocean 
    # or in land, this function doesn't return trajectories but only trajectories limits 
    def get_traject_limits(self,interruptions):
        trajet_limits={}
        for seal in interruptions.keys():
            trajet_limits[seal]=list()
        for seal in interruptions.keys():
            for indexe in range(len(interruptions[seal])):
                if indexe==0:
                    trajet_limits[seal].append((0,interruptions[seal][indexe][1]))
                else:
                    trajet_limits[seal].append((interruptions[seal][indexe-1][1]+1,interruptions[seal][indexe][1])) 
        return trajet_limits
    
    #this function is used to remove land trajectories because our objectif and purpose is to search the
    # relation between foraging activities, time and positions or places. 
    #"for seals, they search prey in ocean not in land" 
    def remove_ter_trajet(self,phoque_trajets):
        keys_to_remove = []
        for key in phoque_trajets.keys():
            if phoque_trajets[key]["sequence_start_MT"].count('M') < phoque_trajets[key]["sequence_start_MT"].count('T') or len(phoque_trajets[key]["sequence_start_MT"])<3:      
                keys_to_remove.append(key)
        for key_to_r in keys_to_remove:
            phoque_trajets.pop(key_to_r)

        return phoque_trajets
    
    # This function allws to orgnize data where we have a list of trajectories(entier trajectories)
    # ids in the form of dictionaries, each seal is a dictionnary of paths to ficilitate the 
    #extraction of subtrajectories to build caracteristic structure .
    def getPhoque_trajets(self):
        uniqueIds=self.seal_members
        dataOrgnized={}
        for item in uniqueIds:
          dataOrgnized[item]={}
        for key in dataOrgnized.keys():
          dataOrgnized[key]["De_DATE"]=list()
          dataOrgnized[key]["Ds_DATE"]=list()
          dataOrgnized[key]["start_lat"]=list()
          dataOrgnized[key]["end_lat"]=list()
          dataOrgnized[key]["start_lon"]=list()
          dataOrgnized[key]["end_lon"]=list()
          dataOrgnized[key]["start_MT"]=list()
          dataOrgnized[key]["end_MT"]=list()
        #   dataOrgnized[key]["classe_jour_nuit"]=list()
          dataOrgnized[key]["activity"]=list()


        for index,row in self.df.iterrows():
          dataOrgnized[row['SEAL']]["De_DATE"].append(row[f"DE_DATE_{self.T}_m_ts"])
          dataOrgnized[row['SEAL']]["Ds_DATE"].append(row[f"ds_date_{self.T}_m_ts"])
          dataOrgnized[row['SEAL']]["start_lat"].append(row["start_lat"])
          dataOrgnized[row['SEAL']]["end_lat"].append(row["LAT"])
          dataOrgnized[row['SEAL']]["start_lon"].append(row["start_lon"])
          dataOrgnized[row['SEAL']]["end_lon"].append(row["LON"])
          dataOrgnized[row['SEAL']]["start_MT"].append(row["start_MT"])
          dataOrgnized[row['SEAL']]["end_MT"].append(row["end_MT"])
        #   dataOrgnized[row['SEAL']]["classe_jour_nuit"].append(row["classe_jour_nuit"])
          dataOrgnized[row['SEAL']]["activity"].append(row["activities"])
            
        seals=[]    # Dans cette variable on va stocker our data sous la forme suivante:
            #une liste des dictionnaires et chaque dictionnaire represente l'ensemble des activites d'une
            #phoque avec autres informations sont lister en dessous.cette partie a pour objectif seulement
            #de regrouper les lignes de chaque phoque
        for item in uniqueIds:
            chemin_start_time=[]
            chemin_end_time=[]
            chemin_start_lat=[]
            chemin_end_lat=[]
            chemin_start_lon=[]
            chemin_end_lon=[]
            chemin_start_MT = []
            chemin_end_MT = []
        #     chemin_JN = []
            chemin_activities = []
            for i in range(len(dataOrgnized[item]["start_lat"])):
                chemin_start_lat.append(dataOrgnized[item]["start_lat"][i])
                chemin_end_lat.append(dataOrgnized[item]["end_lat"][i])
                chemin_start_lon.append(dataOrgnized[item]["start_lon"][i])
                chemin_end_lon.append(dataOrgnized[item]["end_lon"][i])
                chemin_start_MT.append(dataOrgnized[item]["start_MT"][i])
                chemin_end_MT.append(dataOrgnized[item]["end_MT"][i])
                chemin_start_time.append(dataOrgnized[item]["Ds_DATE"][i])
                chemin_end_time.append(dataOrgnized[item]["De_DATE"][i])
        #         chemin_JN.append(dataOrgnized[item]["classe_jour_nuit"][i])
                chemin_activities.append(dataOrgnized[item]["activity"][i])

            seals.append({item:{"sequence_start_time":chemin_start_time,
                                "sequence_end_time" : chemin_end_time,
                                "sequence_start_lat" : chemin_start_lat,
                                "sequence_end_lat" :chemin_end_lat,
                                "sequence_start_lon" :chemin_start_lon,
                                "sequence_end_lon" :chemin_end_lon,
                                "sequence_start_MT" : chemin_start_MT ,
                                "sequence_end_MT" : chemin_end_MT ,
        #                         "classe_jour_nuit" : chemin_JN ,
                                "sequence_activities " :chemin_activities}})
        li_k=[]
        for i in range(14):
            li_k.append(list(seals[i].keys()))
        li_k,seals[0]["V13"].keys()
        interruptions={}
        for j  in range(14):
            interruptions[li_k[j][0]]=[]   # chaque ID a une liste vide. exemple--> v13:[]
        for seal_dict in seals:
            for seal in seal_dict:
                start_MT = seal_dict[seal]["sequence_start_MT"]
                end_MT   = seal_dict[seal]["sequence_end_MT"]
                for i in range(len(start_MT)):
                    if i>0 :
                        if end_MT[i] != end_MT[i-1]:
                            interruptions[seal].append((i-1,i))
                        elif end_MT[i] == end_MT[i-1]:
                            pass
                        else:
                            print("Error")

        limits = self.get_traject_limits(interruptions)
        phoque_trajets={}
        i=0
        for seal in limits.keys():
            for indice,limit in enumerate(limits[seal]):
                phoque_trajets[f"{seal}_{indice}"]={
                   "sequence_start_time": seals[i][seal]["sequence_start_time"][limit[0]:limit[1]] ,  
                    "sequence_end_time" : seals[i][seal]["sequence_end_time"][limit[0]:limit[1]],
                    "sequence_start_lat": seals[i][seal]["sequence_start_lat"][limit[0]:limit[1]],
                      "sequence_end_lat": seals[i][seal]["sequence_end_lat"][limit[0]:limit[1]],
                    "sequence_start_lon": seals[i][seal]["sequence_start_lon"][limit[0]:limit[1]],
                      "sequence_end_lon": seals[i][seal]["sequence_end_lon"][limit[0]:limit[1]],
                     "sequence_start_MT": seals[i][seal]["sequence_start_MT"][limit[0]:limit[1]],
                       "sequence_end_MT": seals[i][seal]["sequence_end_MT"][limit[0]:limit[1]],
        #                "classe_jour_nuit": seals[i][seal]["classe_jour_nuit"][limit[0]:limit[1]],
                   "sequence_activities": seals[i][seal]["sequence_activities "][limit[0]:limit[1]]
                }
            i+=1
        phoque_trajets=self.remove_ter_trajet(phoque_trajets)
        return phoque_trajets
    
    def getPhoque_trajetsVwithLand(self):
        uniqueIds=self.seal_members
        dataOrgnized={}
        for item in uniqueIds:
          dataOrgnized[item]={}
        for key in dataOrgnized.keys():
          dataOrgnized[key]["De_DATE"]=list()
          dataOrgnized[key]["Ds_DATE"]=list()
          dataOrgnized[key]["start_lat"]=list()
          dataOrgnized[key]["end_lat"]=list()
          dataOrgnized[key]["start_lon"]=list()
          dataOrgnized[key]["end_lon"]=list()
          dataOrgnized[key]["activity"]=list()


        for index,row in self.df.iterrows():
          dataOrgnized[row['SEAL']]["De_DATE"].append(row[f"DE_DATE_{self.T}_m_ts"])
          dataOrgnized[row['SEAL']]["Ds_DATE"].append(row[f"ds_date_{self.T}_m_ts"])
          dataOrgnized[row['SEAL']]["start_lat"].append(row["start_lat"])
          dataOrgnized[row['SEAL']]["end_lat"].append(row["LAT"])
          dataOrgnized[row['SEAL']]["start_lon"].append(row["start_lon"])
          dataOrgnized[row['SEAL']]["end_lon"].append(row["LON"])
          dataOrgnized[row['SEAL']]["activity"].append(row["activities"])
            
        seals=[]    # Dans cette variable on va stocker our data sous la forme suivante:
            #une liste des dictionnaires et chaque dictionnaire represente l'ensemble des activites d'une
            #phoque avec autres informations sont lister en dessous.cette partie a pour objectif seulement
            #de regrouper les lignes de chaque phoque
        for item in uniqueIds:
            chemin_start_time=[]
            chemin_end_time=[]
            chemin_start_lat=[]
            chemin_end_lat=[]
            chemin_start_lon=[]
            chemin_end_lon=[]
        #     chemin_JN = []
            chemin_activities = []
            for i in range(len(dataOrgnized[item]["start_lat"])):
                chemin_start_lat.append(dataOrgnized[item]["start_lat"][i])
                chemin_end_lat.append(dataOrgnized[item]["end_lat"][i])
                chemin_start_lon.append(dataOrgnized[item]["start_lon"][i])
                chemin_end_lon.append(dataOrgnized[item]["end_lon"][i])
                chemin_start_time.append(dataOrgnized[item]["Ds_DATE"][i])
                chemin_end_time.append(dataOrgnized[item]["De_DATE"][i])
        #         chemin_JN.append(dataOrgnized[item]["classe_jour_nuit"][i])
                chemin_activities.append(dataOrgnized[item]["activity"][i])

            seals.append({item:{"sequence_start_time":chemin_start_time,
                                "sequence_end_time" : chemin_end_time,
                                "sequence_start_lat" : chemin_start_lat,
                                "sequence_end_lat" :chemin_end_lat,
                                "sequence_start_lon" :chemin_start_lon,
                                "sequence_end_lon" :chemin_end_lon,
                                "sequence_activities" :chemin_activities}})
        phoque_trajets={}
#         i=0
        for index in range(len(seals)):
            phoque_trajets[list(seals[index].keys())[0]]=seals[index][list(seals[index].keys())[0]]
            
        return phoque_trajets

    
    # build characteristic structure in the form: (Tmin,Tmax):(((Xs,Ys),(Xe,Ye)),{a1,a2,...,an})
    def build_data(self,trject_type='T'):
        if trject_type=='MT':
            dataDict=self.getPhoque_trajets()
        elif trject_type=='T':
            dataDict=self.getPhoque_trajetsVwithLand()
        data={}
        for key in dataDict.keys():
            if len(dataDict[key]["sequence_activities"])>10:
                data[key]={}
        for key in dataDict.keys():
            if len(dataDict[key]["sequence_activities"])>10:
                for i in range(len(dataDict[key]["sequence_activities"])):
                    data[key][(dataDict[key]["sequence_start_time"][i],dataDict[key]["sequence_end_time"][i])]=(((dataDict[key]["sequence_start_lon"][i],dataDict[key]["sequence_start_lat"][i]),(dataDict[key]["sequence_end_lon"][i],dataDict[key]["sequence_end_lat"][i])),{dataDict[key]["sequence_activities"][i]})          

        return data




In [108]:
prepro = Preprocessing(path="../GALACTIC Analysis/datasets/dataset_A_P.csv")

In [109]:
prepro.preprocessing(120)
prepro.df.head()

  self.df[f"ds_date_{self.T}_m"] = self.df["ds_date"].dt.floor(f'{T_minute}T')
  self.df[f"DE_DATE_{self.T}_m"] = self.df["DE_DATE"].dt.floor(f'{T_minute}T')+pd.Timedelta(minutes=T_minute)


Unnamed: 0,SEAL,ds_date_120_m,DE_DATE_120_m,start_lat,LAT,start_lon,LON,activities,strat_heur,end_heur,ds_date,DE_DATE,start_MT,end_MT,ds_date_120_m_ts,DE_DATE_120_m_ts
0,V13,2020-10-13 16:00:00,2020-10-13 18:00:00,49.377463,49.37686,-1.156893,-1.15726,OR,16.4,16.583333,2020-10-13 16:24:00,2020-10-13 16:35:00,M,M,1602598000.0,1602605000.0
1,V13,2020-10-13 18:00:00,2020-10-13 22:00:00,49.375681,49.37581,-1.158076,-1.15801,OR,19.966667,20.0,2020-10-13 19:58:00,2020-10-13 20:00:00,M,M,1602605000.0,1602619000.0
2,V13,2020-10-13 20:00:00,2020-10-13 22:00:00,49.375907,49.38651,-1.157957,-1.15517,OR,20.016667,21.983333,2020-10-13 20:01:00,2020-10-13 21:59:00,M,M,1602612000.0,1602619000.0
3,V13,2020-10-13 22:00:00,2020-10-14 02:00:00,49.386607,49.39419,-1.155226,-1.1597,OR,22.016667,0.083333,2020-10-13 22:01:00,2020-10-14 00:05:00,M,M,1602619000.0,1602634000.0
4,V13,2020-10-14 00:00:00,2020-10-14 02:00:00,49.394224,49.40113,-1.159716,-1.16379,OR,0.1,1.983333,2020-10-14 00:06:00,2020-10-14 01:59:00,M,T,1602626000.0,1602634000.0


In [110]:
phoque_trajets = prepro.build_data("T") # 'T' pour une trjactoire entiere et
                                        # 'MT' pour extraire les trajets marin

In [111]:
phoque_trajets["V13"] # or phoque_trajets["V13_i"] if MT

{(1602597600.0, 1602604800.0): (((-1.156893, 49.377463), (-1.15726, 49.37686)),
  {'OR'}),
 (1602604800.0, 1602619200.0): (((-1.158076, 49.375681), (-1.15801, 49.37581)),
  {'OR'}),
 (1602612000.0, 1602619200.0): (((-1.157957, 49.375907), (-1.15517, 49.38651)),
  {'OR'}),
 (1602619200.0, 1602633600.0): (((-1.155226, 49.386607), (-1.1597, 49.39419)),
  {'OR'}),
 (1602626400.0, 1602633600.0): (((-1.159716, 49.394224), (-1.16379, 49.40113)),
  {'OR'}),
 (1602633600.0, 1602648000.0): (((-1.163816, 49.401179), (-1.16816, 49.40855)),
  {'OR'}),
 (1602640800.0, 1602655200.0): (((-1.168183, 49.408589), (-1.17247, 49.41586)),
  {'OR'}),
 (1602648000.0, 1602655200.0): (((-1.172488, 49.415894), (-1.16859, 49.41277)),
  {'OR,UR'}),
 (1602655200.0, 1602662400.0): (((-1.159883, 49.391195), (-1.15841, 49.37356)),
  {'OR'}),
 (1602662400.0, 1602669600.0): (((-1.157261, 49.373969), (-1.15512, 49.38047)),
  {'OR'}),
 (1602684000.0, 1602698400.0): (((-1.154261, 49.377769), (-1.15231, 49.37876)),
  {'OR'}

In [None]:
# This is only to extract a sample of trajectories based on their lenght

In [122]:
i=0
phoque_trajetsv1=dict()
for item in phoque_trajets:
    if i<10:
        phoque_trajetsv1[item]=phoque_trajets[item]
        i+=1

In [128]:
# from FCA_sequence import Spatio_temporal_sequences
import time
start_time = time.time()
sts = Spatio_temporal_sequences(data=phoque_trajets,cardinality=2,treshold_meter=5000)
# cs = sts.commun_sequences(data["person_bleu"],data["person_noir"],"person_bleu",["person_noir"],treshold_meter=2)
# css= sts.getCommunsequences(treshold=5)
context=sts.getContext()

fca=FCAsequence(context)
concept = fca.generate_concepts()

concept_table=sts.build_data_frame_result(concept)

# Marquer la fin du chronométrage
end_time = time.time()

# Calculer le temps d'exécution
execution_time = end_time - start_time
print(f"Temps d'exécution : {execution_time:.4f} secondes")
concept_table.head()

KeyboardInterrupt: 

In [126]:
for index,row in concept_table.iterrows():
    print("Groupe :",row[0],"Common subsequences :",row[1])

Group : {'V16', 'V19', 'V20', 'V14', 'V24', 'V15', 'V17', 'V21', 'V18', 'V13'} Common subsequences : set()
Group : {'V16', 'V19', 'V14', 'V18', 'V15', 'V13'} Common subsequences : {"(1606820400.0, 1606827600.0):{'OR'}"}
Group : {'V17', 'V16', 'V14', 'V18', 'V15', 'V13'} Common subsequences : {"(1604228400.0, 1604235600.0):{'OR'}"}
Group : {'V17', 'V16', 'V19', 'V14', 'V15', 'V13'} Common subsequences : {"(1604494800.0, 1604502000.0):{'OR'}"}
Group : {'V17', 'V19', 'V14', 'V18', 'V15', 'V13'} Common subsequences : {"(1602748800.0, 1602756000.0):{'OR'}", "(1602770400.0, 1602777600.0):{'OR'}"}
Group : {'V17', 'V16', 'V19', 'V14', 'V18'} Common subsequences : {"(1603375200.0, 1603382400.0):{'OR'}"}
Group : {'V17', 'V16', 'V19', 'V15', 'V13'} Common subsequences : {"(1604667600.0, 1604674800.0):{'OR'}", "(1604494800.0, 1604502000.0):{'OR'}"}
Group : {'V17', 'V16', 'V14', 'V15', 'V13'} Common subsequences : {"(1604494800.0, 1604502000.0):{'OR'}", "(1604228400.0, 1604235600.0):{'OR'}"}
Group 

  print("Group :",row[0],"Common subsequences :",row[1])


In [113]:
from itertools import chain, combinations
import ast
class FCAsequence():
    def __init__(self,context):
        self.context = context
        
#     def powerset(self,iterable): # cette fonction a pour objectif de retourner les combinaisons possibles a
#         s = list(iterable)  # partir d'un ensemble des objets. Il commence par ∅ jusqu'a l'ensemble de tous les elements       
#         return chain.from_iterable(combinations(s, r) for r in range(len(s)+1))
    def powerset(self, iterable):
        s = list(iterable)
        return chain.from_iterable(combinations(s, r) for r in range(len(s), -1, -1))

    def closure(self, objects): # cette fonction a pour objectif de retourner les objects qui possedent
                                # des attributs communs(les attributes communs inclu dans context) 
        attributes = set.intersection(*(self.context[obj] for obj in objects))  
        extent = {obj for obj in self.context if self.context[obj].issuperset(attributes)} # ce ligne est tres important
                                        # il permet de retourner tout les objets qui contenant les attributs
                                        # 'attributs' c-a-d l'ensemble des attributs le plus grand qui 
        return extent, attributes       # contient les attributs voir l'exemple de [9] et [0,4]
                                        # produit des concepts fermés
    def IncludeIn(self,I=tuple,L=tuple,S1=set,S2=set):
        if I[0]>=L[0] and I[1]<=L[1] and S1.issubset(S2) :
            return True
        else:
            return False
    def isSubCara(self,cara1,cara2):
        for c2 in set(cara2):
            isRedondant=True
            for c1 in set(cara1):
                I,S = c1.split(":")[0],c1.split(":")[1]
                Is,Ss = c2.split(":")[0],c2.split(":")[1]
                I,S=ast.literal_eval(I),ast.literal_eval(S)
                Is,Ss=ast.literal_eval(Is),ast.literal_eval(Ss)
                if self.IncludeIn(I,Is,S,Ss):
                    isRedondant=True
                else:
                    isRedondant=False
                    break
            if isRedondant==True:
                return True
        return False
    def isRedondant(self,d,e):
        g,c=e
        g,c= set(g),set(c)
        for gs,cs in d:
            if g.issubset(set(gs)): # ou niveau des individus
                if self.isSubCara(c,cs):
                    return True
        return False
    def brushCaract(self,fset):
        intervalles = list(fset)
        result = frozenset()

        while intervalles:
            intervalle = intervalles.pop()
            merged = set([intervalle])

            # Extraire les bornes et les caractéristiques
            bornes, caracteristique_str = intervalle.split(":")
            borne_inf, borne_sup = map(eval, bornes.strip("()").split(","))

            # Convertir la caractéristique en ensemble
            caracteristique = set(caracteristique_str.strip("{}").split(","))

            for autre in intervalles[:]:
                autres_bornes, autres_caracteristique_str = autre.split(":")
                autres_borne_inf, autres_borne_sup = map(eval, autres_bornes.strip("()").split(","))
                autres_caracteristique = set(autres_caracteristique_str.strip("{}").split(","))

                # Vérification de la fusion
                if caracteristique == autres_caracteristique:
                    if borne_inf <= autres_borne_sup and borne_sup >= autres_borne_inf:
                        merged.add(autre)
                        intervalles.remove(autre)

            # Ajouter le résultat fusionné
            if len(merged) > 1:
                min_borne = min(borne_inf, autres_borne_inf)
                max_borne = max(borne_sup, autres_borne_sup)
                result = result.union(frozenset({f"({min_borne}, {max_borne}):{{{','.join(caracteristique)}}}" }))
            else:
                result = result.union(merged)

        return frozenset(result)
    
    def generate_concepts(self):
        objects = list(self.context.keys())
        all_concepts = set()
        for obj_subset in self.powerset(objects):
#             print(obj_subset)
            if obj_subset:
                extent, intent = self.closure(obj_subset)
                concept = (frozenset(extent), self.brushCaract(frozenset(intent)))
#                 print(concept," not In\n",all_concepts,"\n___________")
                if self.isRedondant(all_concepts,concept)==False:
                    all_concepts.add(concept)
#                 else:
#                     print(concept)
        return sorted(all_concepts, key=lambda x: (len(x[1]), len(x[0])))

import math
from itertools import chain, combinations
import pandas as pd


class Spatio_temporal_sequences():
    def __init__(self,data,cardinality,treshold_meter=0):
        self.data=data
        self.cardinality=cardinality
        self.treshold_meter=treshold_meter
        self.memberes=list(data.keys())
    
    
    def Haversine_distance(self,point1, point2): #distance orthodromique (ou formule de Haversine)
        lat1, lon1=point1
        lat2, lon2=point2

        # Rayon de la Terre en kilomètres
        r = 6371.0

        # Conversion des degrés en radians
        lat1 = math.radians(lat1)
        lon1 = math.radians(lon1)
        lat2 = math.radians(lat2)
        lon2 = math.radians(lon2)

        # Différences de latitude et de longitude
        dlat = lat2 - lat1
        dlon = lon2 - lon1

        # Formule de Haversine
        a = math.sin(dlat / 2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon / 2)**2
        c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))

        # Distance en metres
        distance = r * c
        return distance*1000

    def euclidean_distance(self,a,b):
        lat1, lon1=a
        lat2, lon2=b
        return math.sqrt((lat2-lat1)**2+(lon2-lon1)**2)
    
    def intersection_intervals(self,interval1, interval2):
        start = max(interval1[0], interval2[0])
        end = min(interval1[1], interval2[1])

        if start < end:
            return (start, end)
        else:
            return None
    def transform_data_if_needed(self,inner_dict):
        new_inner_dict = {}

        for inner_key, value in inner_dict.items():
            combined_set = set()
            associated_tuples = []

            # Normaliser la structure en une liste de tuples pour traitement uniforme
            values_to_process = value if isinstance(value, list) else [value]

            for item in values_to_process:
                if isinstance(item, tuple) and len(item) == 2:
                    associated_tuples.append(item[0])
                    combined_set.update(item[1])

            new_inner_dict[inner_key] = (tuple(associated_tuples[0]), combined_set)

        return new_inner_dict
    def isAfterInterval(self,interval1,interval2):
        x,y=interval1
        x1,y1=interval2
        if x1>y:
            return True
        else:
            return False
    def commun_sequences(self,sequence1, sequence2,id1,id2=list,treshold_meter=0):
    
        sequence2 = self.transform_data_if_needed(sequence2)
        ids={id1,*id2}
        idsl=list(ids)
        idsl.sort()
        matched_person={tuple(idsl):{}}
        sequence_commun={}
        for interval1 in sequence1:
            for interval2 in sequence2:
                if self.isAfterInterval(interval1=interval1,interval2=interval2)==True:
                    break
                if self.intersection_intervals(interval1, interval2): #cette partie doit etre optimisee
                    
                    if (self.Haversine_distance(sequence1[interval1][0][0],sequence2[interval2][0][0])+self.Haversine_distance(sequence1[interval1][0][1],sequence2[interval2][0][1]))<=treshold_meter:
    #                     print(sequence1[interval1][1])
                        if sequence1[interval1][1] & sequence2[interval2][1]: #ici le travail
                            if len(list(sequence1[interval1][1] & sequence2[interval2][1]))>1:
                                sequence_commun[self.intersection_intervals(interval1, interval2)]=list()
                                for elm in list(sequence1[interval1][1] & sequence2[interval2][1]):
                                    sequence_commun[self.intersection_intervals(interval1, interval2)].append(
                                                                                                            ( ( ((sequence1[interval1][0][0][0]+sequence2[interval2][0][0][0])/2,(sequence1[interval1][0][0][1]+sequence2[interval2][0][0][1])/2),
                                                                                                                (((sequence1[interval1][0][1][0]+sequence2[interval2][0][1][0])/2,(sequence1[interval1][0][1][1]+sequence2[interval2][0][1][1])/2))   
                                                                                                              ),
                                                                                                              {elm}
                                                                                                            )
                                                                                                        )         
                            else:
                                sequence_commun[self.intersection_intervals(interval1, interval2)]=[( ( ((sequence1[interval1][0][0][0]+sequence2[interval2][0][0][0])/2,(sequence1[interval1][0][0][1]+sequence2[interval2][0][0][1])/2),
                                                                                                                (((sequence1[interval1][0][1][0]+sequence2[interval2][0][1][0])/2,(sequence1[interval1][0][1][1]+sequence2[interval2][0][1][1])/2))   
                                                                                                              ),
                                                                                                              sequence1[interval1][1] & sequence2[interval2][1]
                                                                                                            )]   
                            idsl=list(ids)
                            idsl.sort()
                            matched_person[tuple(idsl)]= sequence_commun
        if len(list(matched_person[tuple(idsl)])) ==0:
            return None
        else:
            return matched_person
    
    def powerset(self,iterable): 
        s = list(iterable)         
        return chain.from_iterable(combinations(s, r) for r in range(len(s)+1))
    def getGroupsMore_N_element(self,cardinality):#pour creer des combinison des cles de sequence
        data=self.data
        groups={}
        for i in range(cardinality,len(list(data.keys()))+1): #initialisation de dictionnaire des combinaisons (cle:[])
            groups[len(list(data.keys()))+cardinality-i]=list() # sachant que la cle represent la cardinalite des groupes
                                              # c-a-d les groupes des listes de 2 elements de 3,4 etc

        for obj in self.powerset(list(data.keys())):
            if len(list(obj))>=cardinality:
                groups[len(list(obj))].append(set(obj))
        return groups
    def getCommunsequences(self):
        treshold=self.treshold_meter
        data=self.data
        groups=self.getGroupsMore_N_element(cardinality=self.cardinality)
        results={}
        for keyGroup in groups:
            results[keyGroup]=list()

        for keyGroup in groups: 
            for group in groups[keyGroup]:
                liste=list(group)
                liste.sort()
                result=None
                result =self.commun_sequences(data[liste[0]],data[liste[1]],liste[0],[liste[1]],treshold_meter=treshold) #common subsequence between first and second sequence           
                for i in range(1,len(liste)):
                    if result!=None and liste[i] not in list(list(result)[0]): # le 2eme condi pour le cas de repetition de comparaison 
    #                     print(list(result)[0],liste[i])
                        result =self.commun_sequences(data[liste[i]],result[list(result)[0]],liste[i],list(list(result)[0]),treshold_meter=treshold)
                        if result==None:
                            break
                if result :
                    results[keyGroup].append(result)

        deleted_keys=[]
        for key in results:
            if len(results[key])==0:
                deleted_keys.append(key)
        for key in deleted_keys:
            del results[key]
        return results
    
    def dropLocationPart(self):
        clusters=self.getCommunsequences()
        new_clusters=dict()
        for cardinality in clusters:
            new_clusters[cardinality]=list()
            group_key=dict()
    #         print(cardinality)
            for group in clusters[cardinality]:
                group_key[list(group.keys())[0]]=dict()
    #             print(list(group.keys())[0])
                for key in group:
                    for interval_time in group[key]:
                        for item in group[key][interval_time]:
                            group_key[key][interval_time]=[item[1]]
            new_clusters[cardinality].append(group_key)
        return new_clusters
    def getAttributs(self): #extract attributs from clusters of groups and commun subsequences to create context
        clusters=self.dropLocationPart()
        attributes=[]               
        for cardinality in clusters:
            for group in clusters[cardinality]:
                for key in group:
                    for item in group[key]:
    #                     print(item,"-",group[key][item])
    #                     gk = list(item)[0] # time interval
    #                     if {gk:group[key][gk][0]} not in attributes:
    #                     if {item:group[key][item]} not in attributes:
                        if len(group[key][item])>1: #cette condition peux creer des repititions on va les elminer avec un autre condition        
                            for el in group[key][item]:
                                if {item:el} not in attributes:
                                    attributes.append({item:el})
                        else:
                            if {item:group[key][item][0]} not in attributes:
                                attributes.append({item:group[key][item][0]})

        return attributes
    def getContext_table(self):
        memberes= list(self.data.keys())
        resultats=self.dropLocationPart()
        caracteristiques=self.getAttributs()
        columns=[]
        for caracteristique in caracteristiques:
            for item in caracteristique:
                columns.append(f"{item}:{caracteristique[item]}")
        df = pd.DataFrame(0, index=memberes, columns=columns)

        # Remplir le DataFrame
        for cardi in resultats:
            for item in resultats[cardi]:
                for groupe, caract in item.items():
                    for member in groupe:
                        for car in caract:
        #                     print(f"{car}:{caract[car]}")
                            if len(caract[car])>=2:
                                for e in caract[car]:
                                    print(f"{car}:{e,member}")
                                    col_name = f"{car}:{e}"
        #                             print(col_name)
                                    df.at[member, col_name] = 1
                            else:
                                col_name = f"{car}:{caract[car][0]}"
                                df.at[member, col_name] = 1
        return df
    def getContext(self):
        Binary_dataframe=self.getContext_table()
        context = dict()
        for index,row in Binary_dataframe.iterrows():
            attributs = []
            for i in range(len(row)):
                if row[i]==1:
                    attributs.append(list(Binary_dataframe.columns)[i])
            context[index]=set(attributs)    
        return context
    def get_liste_interval(self,description):
        liste_intervals=[]
        for item in description:
            liste_intervals.append(ast.literal_eval(item.split(":")[0]))
        return liste_intervals
    def moyenne_coordonnees(self,coordonnees):
        # Initialiser les variables pour stocker la somme des coordonnées et le nombre de points
        somme_x = 0
        somme_y = 0
        nombre_points = 0

        # Parcourir chaque paire de coordonnées et chaque point
        for paire in coordonnees:
            for point in paire:
                somme_x += point[0]
                somme_y += point[1]
                nombre_points += 1

        # Calculer les moyennes
        moyenne_x = somme_x / nombre_points
        moyenne_y = somme_y / nombre_points

        return moyenne_x, moyenne_y
    def get_centers_locations(self,sorted_results):
        data=self.data
        concepts=dict()
        for intent, extent in sorted_results:
            if intent:
                concepts[tuple(extent)]=list()
                for interval_selected in self.get_liste_interval(intent):
                    liste_person_location=list()
                    for individual in data:
                        for interval in data[individual]:
                            inter = self.intersection_intervals(interval,interval_selected)
                            if inter and individual in extent:
                                liste_person_location.append((inter,individual,data[individual][interval][0]))
                    concepts[tuple(extent)].append(liste_person_location)

            else:
                concepts[tuple(extent)]=None  
        person_location_centers=dict()

        for group in concepts:
            if concepts[group]:
                person_location_centers[group]=list()
                for subgroup in concepts[group]:
                    subgroup_locations=dict()
                    liste_locations=[]
                    for triple in subgroup:
                        key_time=triple[0]
                        subgroup_locations[key_time]=None
                        liste_locations.append(triple[2])
                    subgroup_locations[key_time]=self.moyenne_coordonnees(liste_locations)
                    person_location_centers[group].append(subgroup_locations)
            else:
                person_location_centers[group]=None
        liste_time=[]
        for group in person_location_centers:
            liste_time.append(person_location_centers[group])
        return liste_time
    def sort_result(self,concept):
        results = []

        for extent, intent in concept:
            results.append((set(intent), set(extent)))

        # Trier les résultats par le nombre d'individus (taille de l'ensemble d'individus) en ordre décroissant
        sorted_results = sorted(results, key=lambda x: len(x[1]), reverse=True)
        return sorted_results
    def build_data_frame_result(self,concept):
        sorted_results=self.sort_result(concept)
        intents=[]
        extents=[]
        # # Afficher les résultats triés
        # for intent, extent in sorted_results:
        #     print(f"Predicats: {intent} \n Individuals: {extent}\n")
        for intent, extent in sorted_results:
            intents.append(intent)
            extents.append(extent)
        concept_table=pd.DataFrame()
        concept_table["Extent"]=extents
        concept_table["Intent"]=intents
        return concept_table


fca=FCAsequence(context)
concept = fca.generate_concepts()

# Créer une liste pour stocker les résultats
results = []

for extent, intent in concept:
    results.append((set(intent), set(extent)))

# Trier les résultats par le nombre d'individus (taille de l'ensemble d'individus) en un ordre décroissant
sorted_results = sorted(results, key=lambda x: len(x[1]), reverse=True)

# Afficher les résultats triés
for intent, extent in sorted_results:
    print(f"Predicats: {intent} \n Individuals: {extent}\n")


Predicats: set() 
 Individuals: {'V13_48', 'V13_70', 'V13_90', 'V13_76', 'V13_20', 'V13_56', 'V14_2', 'V14_20', 'V13_42', 'V13_0', 'V13_62', 'V13_6', 'V13_36', 'V13_38', 'V13_92', 'V13_72', 'V14_8', 'V13_4', 'V13_40', 'V14_10'}



In [1]:
context={'person1': {"(1, 1.5):{'A'}",
                    "(1, 2):{'A'}",
                    "(1, 2):{'A1'}",
                    "(1.5, 2):{'A'}",
                    "(1.5, 2):{'A1'}",
                    "(10, 11):{'D'}",
                    "(13, 13.9):{'E'}",
                    "(13, 13.9):{'E2'}",
                    "(16.99, 17):{'F'}",
                    "(7, 8):{'C'}"},
        'person2': {"(1.5, 2):{'A'}",
                    "(1.5, 2):{'A1'}",
                    "(10.5, 11):{'D1'}",
                    "(13, 13.9):{'E'}",
                    "(13, 13.9):{'E2'}",
                    "(13, 14):{'E'}",
                    "(14.99, 15):{'F'}",
                    "(14.99, 15):{'G'}",
                    "(7, 8):{'C'}"},
        'person3': {"(1, 2):{'AA'}",
                    "(10, 11):{'D'}",
                    "(6, 8):{'C1'}",
                    "(6, 8.4):{'C1'}",
                    "(9, 11):{'D'}"},
        'person4': {"(1, 1.5):{'A'}",
                    "(1, 1.5):{'A2'}",
                    "(10.5, 11):{'D1'}",
                    "(11.5, 13.9):{'E'}",
                    "(11.5, 13.9):{'E1'}",
                    "(13, 13.9):{'E'}",
                    "(13, 13.9):{'E1'}",
                    "(13, 14):{'E'}",
                    "(16.99, 17):{'F'}",
                    "(16.99, 18):{'F'}",
                    "(2, 3):{'B2'}",
                    "(7, 8):{'C2'}"},
        'person5': {"(1, 1.5):{'A'}",
                    "(1, 1.5):{'A2'}",
                    "(1, 2):{'A'}",
                    "(1.5, 2):{'A'}",
                    "(13, 13.9):{'E'}",
                    "(13, 13.9):{'E1'}",
                    "(16.99, 17):{'F'}",
                    "(16.99, 18):{'F'}",
                    "(2, 3):{'B2'}",
                    "(6, 8):{'C1'}",
                    "(6, 8.4):{'C1'}"},
        'person6': {"(1, 2):{'AA'}",
                    "(13, 13.9):{'E'}",
                    "(13, 14):{'E'}",
                    "(14.99, 15):{'F'}",
                    "(6, 8):{'C1'}"},
        'person7': {"(1, 1.5):{'A'}",
                    "(1, 2):{'A'}",
                    "(1, 2):{'A1'}",
                    "(1.5, 2):{'A'}",
                    "(1.5, 2):{'A1'}",
                    "(10, 11):{'D'}",
                    "(11.5, 13.9):{'E'}",
                    "(11.5, 13.9):{'E1'}",
                    "(13, 13.9):{'E'}",
                    "(13, 13.9):{'E1'}",
                    "(14.99, 15):{'F'}",
                    "(14.99, 15):{'G'}",
                    "(7, 8):{'C2'}",
                    "(9, 11):{'D'}"}}