In [1]:
from sklearn.cluster import DBSCAN
from sklearn.metrics import silhouette_score
import numpy as np
import pandas as pd

In [4]:
# read in the CSV file as a DataFrame
data = pd.read_csv("exported.csv")

# select the datetime and room number columns and convert datetime to epoch time
X = data[["datetime", "room"]]

# print the first few rows of the data matrix
print(X.head())

     datetime  room
0  1625629952   3.0
1  1625629952   3.0
2  1625633520   3.0
3  1625648400   3.0
4  1625648400   4.0


In [14]:
# define a range of epsilon values to test
epsilon_range = np.arange(0.1, 2, 0.01)

# initialize variables to store the optimal epsilon and its corresponding silhouette score
optimal_epsilon = None
max_silhouette_score = -1

# loop over the range of epsilon values and compute the silhouette score for each one
for epsilon in epsilon_range:
    dbscan = DBSCAN(eps=epsilon, min_samples=2).fit(X)
    labels = dbscan.labels_
    silhouette_avg = silhouette_score(X, labels)
    
    # update the optimal epsilon and silhouette score if a better one is found
    if silhouette_avg > max_silhouette_score:
        max_silhouette_score = silhouette_avg
        optimal_epsilon = epsilon

# print the optimal epsilon and its corresponding silhouette score
print("Optimal epsilon:", optimal_epsilon)
print("Silhouette score:", max_silhouette_score)

Optimal epsilon: 1.0099999999999996
Silhouette score: 0.9992097772263954
