In [None]:
import pandas as pd
import numpy as np

# Charger les données
df = pd.read_csv("annotated_sensor_data.csv")

# Convertir la colonne timestamp en datetime
df['timestamp'] = pd.to_datetime(df['timestamp'])

# 1. Identifier les activités et calculer leur durée + capteurs "ON" ou "OPEN"
activities = []
current_activity = None
start_time = None

for idx, row in df.iterrows():
    if pd.notna(row['activity']) and 'begin' in row['activity']:
        # Début d'une nouvelle activité
        if current_activity is not None:
            duration = (row['timestamp'] - start_time).total_seconds()

            # Filtrer les capteurs "ON" ou "OPEN" entre "begin" et "end"
            sensor_on_count = df[(df["timestamp"] > start_time) & 
                                 (df["timestamp"] < row["timestamp"]) & 
                                 (df["status"].isin(["ON", "OPEN"]))].shape[0]

            activities.append({
                'activity': current_activity.replace(' begin', ''),
                'start_time': start_time,
                'end_time': row['timestamp'],
                'duration': duration,
                'sensor_on_count': sensor_on_count  # Nombre de capteurs actifs
            })
        
        current_activity = row['activity']
        start_time = row['timestamp']

    elif pd.notna(row['activity']) and 'end' in row['activity'] and current_activity is not None:
        # Fin de l'activité actuelle
        activity_name = current_activity.replace(' begin', '')
        end_activity_name = row['activity'].replace(' end', '')

        if activity_name == end_activity_name:
            duration = (row['timestamp'] - start_time).total_seconds()

            # Compter les capteurs "ON" ou "OPEN" entre "begin" et "end"
            sensor_on_count = df[(df["timestamp"] > start_time) & 
                                 (df["timestamp"] < row["timestamp"]) & 
                                 (df["status"].isin(["ON", "OPEN"]))].shape[0]

            activities.append({
                'activity': activity_name,
                'start_time': start_time,
                'end_time': row['timestamp'],
                'duration': duration,
                'sensor_on_count': sensor_on_count  # Nombre de capteurs actifs
            })
            current_activity = None
            start_time = None


In [3]:
activities_df = pd.DataFrame(activities)
print (activities_df)

                      activity                 start_time  \
0                        Sleep 2009-10-16 00:01:06.000046   
1                Bed_to_Toilet 2009-10-16 03:58:14.000016   
2                        Sleep 2009-10-16 03:58:28.000002   
3                       Chores 2009-10-16 08:41:33.000011   
4                 Morning_Meds 2009-10-16 08:41:51.000035   
...                        ...                        ...   
22403  Master_Bedroom_Activity 2010-01-06 12:45:35.000043   
22404                   Chores 2010-01-06 12:47:51.000063   
22405       Dining_Rm_Activity 2010-01-06 12:47:55.000096   
22406               Leave_Home 2010-01-06 12:47:59.000028   
22407                   Chores 2010-01-06 12:48:12.000021   

                        end_time      duration  sensor_on_count  
0     2009-10-16 03:56:50.000008  14143.999962               45  
1     2009-10-16 03:58:26.000021     12.000005                0  
2     2009-10-16 08:41:28.000080  16980.000078              128  
3  

In [4]:
# calcul taille de segment 


if len(activities_df) == 0:
    print("Aucune activité trouvée avec des marqueurs begin/end")
else:
   
    moy_sensor = round(activities_df['sensor_on_count'].mean())
    print(f"le nombre moyen de capteur est: { moy_sensor} ")

le nombre moyen de capteur est: 8 


In [5]:
# 3. Créer des segments événementiels basés sur la taille des données
num_segments = len(df) // moy_sensor  # Nombre de segments basé sur la taille des données

segments = []
for i in range(num_segments):
    seg_start_index = i * moy_sensor
    seg_end_index = (i + 1) * moy_sensor
    seg_start = df.iloc[seg_start_index]['timestamp']
    seg_end = df.iloc[seg_end_index - 1]['timestamp']
    segments.append({
        'segment_id': i,
        'start_time': seg_start,
        'end_time': seg_end
    })

segments_df = pd.DataFrame(segments)


In [6]:
print(segments_df)

       segment_id                 start_time                   end_time
0               0 2009-10-16 00:01:04.000059 2009-10-16 00:08:55.000040
1               1 2009-10-16 00:24:51.000026 2009-10-16 01:27:34.000002
2               2 2009-10-16 01:27:43.000016 2009-10-16 01:29:05.000061
3               3 2009-10-16 01:29:08.000013 2009-10-16 01:30:51.000058
4               4 2009-10-16 01:32:16.000034 2009-10-16 01:32:46.000035
...           ...                        ...                        ...
53090       53090 2010-01-06 12:47:12.000079 2010-01-06 12:47:43.000054
53091       53091 2010-01-06 12:47:47.000040 2010-01-06 12:47:55.000093
53092       53092 2010-01-06 12:47:55.000095 2010-01-06 12:47:59.000089
53093       53093 2010-01-06 12:48:00.000070 2010-01-06 12:48:14.000046
53094       53094 2010-01-06 12:48:16.000028 2010-01-06 12:48:47.000021

[53095 rows x 3 columns]


In [7]:
import pandas as pd

# Liste des capteurs
sensor_list = ["D001", "D002", "D003"] + [f"M{str(i).zfill(3)}" for i in range(1, 29)]

segment_activities = []

# Traitement des segments
for _, seg in segments_df.iterrows():
    overlapping_activities = activities_df[
        (activities_df['end_time'] > seg['start_time']) & 
        (activities_df['start_time'] < seg['end_time'])
    ].copy()

    active_sensor_counts = []
    sensor_redundancy = {sensor: 0 for sensor in sensor_list}
    
    for _, act in overlapping_activities.iterrows():
        active_sensors = df[
            (df['timestamp'] >= max(act['start_time'], seg['start_time'])) &
            (df['timestamp'] <= min(act['end_time'], seg['end_time'])) &
            (df['status'].isin(['ON', 'OPEN']))
        ]
        
        for sensor in active_sensors['sensor_id'].unique():
            sensor_redundancy[sensor] += active_sensors[active_sensors['sensor_id'] == sensor].shape[0]
        
        active_sensor_count = active_sensors['sensor_id'].count()
        active_sensor_counts.append(active_sensor_count)

    if len(overlapping_activities) > 0:
        overlapping_activities = overlapping_activities.assign(active_sensor_count=active_sensor_counts)
    
    if len(overlapping_activities) > 0:
        dominant_row = overlapping_activities.loc[overlapping_activities['active_sensor_count'].idxmax()]
        dominant_activity = dominant_row['activity']
        dominant_sensor_count = dominant_row['active_sensor_count']
    else:
        dominant_activity = 'No Activity'
        dominant_sensor_count = 0

    segment_duration = (seg['end_time'] - seg['start_time'])
    
    segment_data = {
        'segment_id': seg['segment_id'],
        'start_time': pd.to_datetime(seg['start_time']).timestamp(),   # conversion
        'end_time': pd.to_datetime(seg['end_time']).timestamp(),       # conversion
        'segment_duration': int(segment_duration.total_seconds()),     # en secondes
        'dominant_sensor_count': dominant_sensor_count,
        'dominant_activity': dominant_activity,
    }

    for sensor, redundancy in sensor_redundancy.items():
        segment_data[sensor] = redundancy

    segment_activities.append(segment_data)

# Création du DataFrame final
segment_activities_df = pd.DataFrame(segment_activities)

# Trier le DataFrame
segment_activities_df = segment_activities_df.sort_values(by=['segment_id', 'start_time', 'end_time'])

# Sauvegarder dans un fichier CSV si besoin
segment_activities_df.to_csv("segment_activities_finale.csv", index=False)

print(segment_activities_df)


       segment_id    start_time      end_time  segment_duration  \
0               0  1.255651e+09  1.255652e+09               470   
1               1  1.255653e+09  1.255656e+09              3762   
2               2  1.255656e+09  1.255657e+09                82   
3               3  1.255657e+09  1.255657e+09               103   
4               4  1.255657e+09  1.255657e+09                30   
...           ...           ...           ...               ...   
53090       53090  1.262782e+09  1.262782e+09                30   
53091       53091  1.262782e+09  1.262782e+09                 8   
53092       53092  1.262782e+09  1.262782e+09                 3   
53093       53093  1.262782e+09  1.262782e+09                13   
53094       53094  1.262782e+09  1.262782e+09                30   

       dominant_sensor_count        dominant_activity  D001  D002  D003  M001  \
0                          3                    Sleep     0     0     0     0   
1                          3     

In [8]:
# Création du DataFrame final
segment_activities_df = pd.DataFrame(segment_activities)

# Réorganiser les colonnes : mettre 'dominant_activity' à la fin
cols = [col for col in segment_activities_df.columns if col != 'dominant_activity'] + ['dominant_activity']
segment_activities_df = segment_activities_df[cols]




In [9]:
# Sauvegarder dans un fichier CSV
segment_activities_df.to_csv("segment_activities_finale.csv", index=False)


print(segment_activities_df.head(20).to_string())

    segment_id    start_time      end_time  segment_duration  dominant_sensor_count  D001  D002  D003  M001  M002  M003  M004  M005  M006  M007  M008  M009  M010  M011  M012  M013  M014  M015  M016  M017  M018  M019  M020  M021  M022  M023  M024  M025  M026  M027  M028 dominant_activity
0            0  1.255651e+09  1.255652e+09               470                      3     0     0     0     0     0     0     0     0     0     0     0     1     0     0     0     0     0     0     0     0     0     1     1     0     0     0     0     0     0     0     0             Sleep
1            1  1.255653e+09  1.255656e+09              3762                      3     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     3     0     0     0     0     0     0     0     0             Sleep
2            2  1.255656e+09  1.255657e+09                82                      4     0     0     0     0     0     0     0     0     

In [6]:
#methode encodage cyclique 
import pandas as pd
import numpy as np

# Liste des capteurs
sensor_list = ["D001", "D002", "D003"] + [f"M{str(i).zfill(3)}" for i in range(1, 29)]

segment_activities = []

for _, seg in segments_df.iterrows():
    overlapping_activities = activities_df[
        (activities_df['end_time'] > seg['start_time']) & 
        (activities_df['start_time'] < seg['end_time'])
    ].copy()

    active_sensor_counts = []
    sensor_redundancy = {sensor: 0 for sensor in sensor_list}
    
    for _, act in overlapping_activities.iterrows():
        active_sensors = df[
            (df['timestamp'] >= max(act['start_time'], seg['start_time'])) &
            (df['timestamp'] <= min(act['end_time'], seg['end_time'])) &
            (df['status'].isin(['ON', 'OPEN']))
        ]
        
        for sensor in active_sensors['sensor_id'].unique():
            sensor_redundancy[sensor] += active_sensors[active_sensors['sensor_id'] == sensor].shape[0]
        
        active_sensor_count = active_sensors['sensor_id'].count()
        active_sensor_counts.append(active_sensor_count)

    if len(overlapping_activities) > 0:
        overlapping_activities = overlapping_activities.assign(active_sensor_count=active_sensor_counts)

    if len(overlapping_activities) > 0:
        dominant_row = overlapping_activities.loc[overlapping_activities['active_sensor_count'].idxmax()]
        dominant_activity = dominant_row['activity']
        dominant_sensor_count = dominant_row['active_sensor_count']
    else:
        dominant_activity = 'No Activity'
        dominant_sensor_count = 0

    segment_duration = (seg['end_time'] - seg['start_time'])
    
    # Convertir en datetime si ce n'est pas déjà
    start_time = pd.to_datetime(seg['start_time'])
    end_time = pd.to_datetime(seg['end_time'])

    # Encodage cyclique - HOUR
    start_hour = start_time.hour + start_time.minute / 60
    end_hour = end_time.hour + end_time.minute / 60

    start_hour_sin = np.sin(2 * np.pi * start_hour / 24)
    start_hour_cos = np.cos(2 * np.pi * start_hour / 24)
    end_hour_sin = np.sin(2 * np.pi * end_hour / 24)
    end_hour_cos = np.cos(2 * np.pi * end_hour / 24)

    # Encodage cyclique - DAY OF WEEK
    start_dow = start_time.weekday()
    end_dow = end_time.weekday()

    start_dow_sin = np.sin(2 * np.pi * start_dow / 7)
    start_dow_cos = np.cos(2 * np.pi * start_dow / 7)
    end_dow_sin = np.sin(2 * np.pi * end_dow / 7)
    end_dow_cos = np.cos(2 * np.pi * end_dow / 7)

    segment_data = {
        'segment_id': seg['segment_id'],
        'segment_duration': int(segment_duration.total_seconds()),
        'dominant_sensor_count': dominant_sensor_count,
        'dominant_activity': dominant_activity,

        # Encodage cyclique
        'start_hour_sin': start_hour_sin,
        'start_hour_cos': start_hour_cos,
        'end_hour_sin': end_hour_sin,
        'end_hour_cos': end_hour_cos,

        'start_dow_sin': start_dow_sin,
        'start_dow_cos': start_dow_cos,
        'end_dow_sin': end_dow_sin,
        'end_dow_cos': end_dow_cos,
    }

    for sensor, redundancy in sensor_redundancy.items():
        segment_data[sensor] = redundancy

    segment_activities.append(segment_data)

# Création du DataFrame final
segment_activities_df = pd.DataFrame(segment_activities)

# Trier le DataFrame
segment_activities_df = segment_activities_df.sort_values(by=['segment_id'])

# Sauvegarder dans un fichier CSV si besoin
segment_activities_df.to_csv("segment_activities_cyclique.csv", index=False)

print(segment_activities_df)


       segment_id  segment_duration  dominant_sensor_count  \
0               0               470                      3   
1               1              3762                      3   
2               2                82                      4   
3               3               103                      4   
4               4                30                      4   
...           ...               ...                    ...   
53090       53090                30                      5   
53091       53091                 8                      2   
53092       53092                 3                      2   
53093       53093                13                      3   
53094       53094                30                      3   

             dominant_activity  start_hour_sin  start_hour_cos  end_hour_sin  \
0                        Sleep        0.004363        0.999990      0.034899   
1                        Sleep        0.104528        0.994522      0.370557   
2              

In [7]:
# Création du DataFrame final
segment_activities_df = pd.DataFrame(segment_activities)

# Réorganiser les colonnes : mettre 'dominant_activity' à la fin
cols = [col for col in segment_activities_df.columns if col != 'dominant_activity'] + ['dominant_activity']
segment_activities_df = segment_activities_df[cols]


In [8]:
# Sauvegarder dans un fichier CSV
segment_activities_df.to_csv("segment_activities_finale.csv", index=False)


print(segment_activities_df.head(20).to_string())

    segment_id  segment_duration  dominant_sensor_count  start_hour_sin  start_hour_cos  end_hour_sin  end_hour_cos  start_dow_sin  start_dow_cos  end_dow_sin  end_dow_cos  D001  D002  D003  M001  M002  M003  M004  M005  M006  M007  M008  M009  M010  M011  M012  M013  M014  M015  M016  M017  M018  M019  M020  M021  M022  M023  M024  M025  M026  M027  M028 dominant_activity
0            0               470                      3        0.004363        0.999990      0.034899      0.999391      -0.433884      -0.900969    -0.433884    -0.900969     0     0     0     0     0     0     0     0     0     0     0     1     0     0     0     0     0     0     0     0     0     1     1     0     0     0     0     0     0     0     0             Sleep
1            1              3762                      3        0.104528        0.994522      0.370557      0.928810      -0.433884      -0.900969    -0.433884    -0.900969     0     0     0     0     0     0     0     0     0     0     0     0     

In [1]:
import pandas as pd
df = pd.read_csv("segment_activities_finale.csv")
df.head(30).to_csv("donnees_apercu.csv", index=False)


In [1]:
#tableau demandé

import pandas as pd

# Charger les données
df = pd.read_csv("annotated_sensor_data.csv")
df['timestamp'] = pd.to_datetime(df['timestamp'])

activities = []
current_activity = None
start_time = None

for idx, row in df.iterrows():
    if pd.notna(row['activity']) and 'begin' in row['activity']:
        current_activity = row['activity']
        start_time = row['timestamp']

    elif pd.notna(row['activity']) and 'end' in row['activity'] and current_activity is not None:
        activity_name = current_activity.replace(' begin', '')
        end_activity_name = row['activity'].replace(' end', '')

        if activity_name == end_activity_name:
            end_time = row['timestamp']

            sensor_on_count = df[(df["timestamp"] > start_time) & 
                                 (df["timestamp"] < end_time) & 
                                 (df["status"].isin(["ON", "OPEN"]))].shape[0]

            activities.append({
                'activity': activity_name,
                'sensor_on_count': sensor_on_count
            })

            current_activity = None
            start_time = None

# Créer un DataFrame
activities_df = pd.DataFrame(activities)

# Grouper par activité et calculer le total et la moyenne
summary_df = activities_df.groupby('activity').agg(
    total_events=('sensor_on_count', 'sum'),
    average_events=('sensor_on_count', 'mean')
).reset_index()

# Afficher le tableau final
print(summary_df)


                   activity  total_events  average_events
0             Bed_to_Toilet          4316        5.302211
1                    Chores         48980        6.278682
2             Desk_Activity          8327        9.080698
3        Dining_Rm_Activity          8720        2.834850
4                  Eve_Meds          2364        7.986486
5            Guest_Bathroom          3605        3.743510
6          Kitchen_Activity         42674       16.559565
7                Leave_Home         11122        6.999371
8   Master_Bedroom_Activity          8907       10.955720
9                  Meditate           624        3.151515
10             Morning_Meds          7344       19.326316
11                     Read          1852        2.967949
12                    Sleep         16643       37.232662
13                 Watch_TV          6532        3.416318
