In [1]:
# importing necessary libraries

from sklearn.cluster import DBSCAN
import numpy as np
import pandas as pd

#loading the data from the csv file

data = pd.read_csv('D:/Time Series Sentics/test_Data_1.csv')


# Converting date and time into datetime objects
data['timestamp_id'] = pd.to_datetime(data['timestamp_id'])

# Sorting the data by timestamp
data_sorted = data.sort_values(by='timestamp_id')

#placeholder to hold the processed data

processed_data = []

# DBSCAN parameters
epsilon = 2  # 2 meters distance threshold for clustering
min_samples = 1  # Minimum samples in a cluster

for timestamp, group in data_sorted.groupby('timestamp_id'):
    # Extracting positions
    positions = group[['x_position', 'y_position']].to_numpy()
    
    # Applying DBSCAN clustering
    dbscan = DBSCAN(eps=epsilon, min_samples=min_samples, metric='euclidean')
    cluster_labels = dbscan.fit_predict(positions)
    
    # Processing each cluster
    for cluster_id in np.unique(cluster_labels): #
        cluster_mask = cluster_labels == cluster_id
        cluster = group[cluster_mask]
        
        # Determining common unique_id or assigning new if none or multiple different ids
        unique_ids = cluster['unique_id'].unique()
        unique_ids = unique_ids[unique_ids != 0]  # Filter out unknown unique_ids
        if len(unique_ids) == 1:
            f_u_id = unique_ids[0]
        else:
            # assigning a new unique_id, for simplicity, using -1 to indicate a new or unresolved id
            f_u_id = -1
        
        # Preparing fused data entry
        f_timestamp = timestamp.strftime('%Y-%m-%dT%H:%M:%S.%fZ') 
        f_id = np.random.randint(1000, 9999)  # Random f_id
        cluster_data = cluster[['x_position', 'y_position', 'sensor_id']].values.tolist()
        
        processed_data.append([f_timestamp, f_id, cluster_data, f_u_id])

# Converting processed data to DataFrame
columns = ['f_timestamp', 'f_id', 'cluster_data', 'f_u_id']
processed_df = pd.DataFrame(processed_data, columns=columns)

# Displaying first 5 rows
processed_df.head()


# craeting a new csv file for the output
output_path = 'D:/Time Series Sentics/output.csv'
processed_df.to_csv(output_path, index=False)